1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/inline/spops.h" 9325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 103b547af2SSatish Balay 11b01c7715SBarry Smith #include "src/inline/ilu.h" 12b01c7715SBarry Smith 13b01c7715SBarry Smith #undef __FUNCT__ 1443516a2dSKris Buschelman #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15bc08b0f1SBarry Smith /*@ 1643516a2dSKris Buschelman MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 1743516a2dSKris Buschelman 1843516a2dSKris Buschelman Collective on Mat 1943516a2dSKris Buschelman 2043516a2dSKris Buschelman Input Parameters: 2143516a2dSKris Buschelman . mat - the matrix 2243516a2dSKris Buschelman 2343516a2dSKris Buschelman Level: advanced 2443516a2dSKris Buschelman @*/ 2546129b97SKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 2643516a2dSKris Buschelman { 2743516a2dSKris Buschelman PetscErrorCode ierr,(*f)(Mat); 2843516a2dSKris Buschelman 2943516a2dSKris Buschelman PetscFunctionBegin; 3043516a2dSKris Buschelman PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 3143516a2dSKris Buschelman if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3243516a2dSKris Buschelman if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3343516a2dSKris Buschelman 3443516a2dSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 3543516a2dSKris Buschelman if (f) { 3643516a2dSKris Buschelman ierr = (*f)(mat);CHKERRQ(ierr); 3743516a2dSKris Buschelman } else { 3843516a2dSKris Buschelman SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 3943516a2dSKris Buschelman } 4043516a2dSKris Buschelman PetscFunctionReturn(0); 4143516a2dSKris Buschelman } 4243516a2dSKris Buschelman 4343516a2dSKris Buschelman EXTERN_C_BEGIN 4443516a2dSKris Buschelman #undef __FUNCT__ 45b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 4646129b97SKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47b01c7715SBarry Smith { 48b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 496849ba73SBarry Smith PetscErrorCode ierr; 50899cda47SBarry Smith PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51b01c7715SBarry Smith PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52b01c7715SBarry Smith 53b01c7715SBarry Smith PetscFunctionBegin; 54b01c7715SBarry Smith if (a->idiagvalid) PetscFunctionReturn(0); 55b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56b01c7715SBarry Smith diag_offset = a->diag; 57b01c7715SBarry Smith if (!a->idiag) { 58b01c7715SBarry Smith ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59b01c7715SBarry Smith } 60b01c7715SBarry Smith diag = a->idiag; 61b01c7715SBarry Smith mdiag = a->idiag+bs*bs*mbs; 62b01c7715SBarry Smith /* factor and invert each block */ 63521d7252SBarry Smith switch (bs){ 64b01c7715SBarry Smith case 2: 65b01c7715SBarry Smith for (i=0; i<mbs; i++) { 66b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 67b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70b01c7715SBarry Smith diag += 4; 71b01c7715SBarry Smith mdiag += 4; 72b01c7715SBarry Smith } 73b01c7715SBarry Smith break; 74b01c7715SBarry Smith case 3: 75b01c7715SBarry Smith for (i=0; i<mbs; i++) { 76b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 77b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79b01c7715SBarry Smith diag[8] = odiag[8]; 80b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81b01c7715SBarry Smith mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82b01c7715SBarry Smith mdiag[8] = odiag[8]; 83b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84b01c7715SBarry Smith diag += 9; 85b01c7715SBarry Smith mdiag += 9; 86b01c7715SBarry Smith } 87b01c7715SBarry Smith break; 88b01c7715SBarry Smith case 4: 89b01c7715SBarry Smith for (i=0; i<mbs; i++) { 90b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 91b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94b01c7715SBarry Smith diag += 16; 95b01c7715SBarry Smith mdiag += 16; 96b01c7715SBarry Smith } 97b01c7715SBarry Smith break; 98b01c7715SBarry Smith case 5: 99b01c7715SBarry Smith for (i=0; i<mbs; i++) { 100b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 101b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104b01c7715SBarry Smith diag += 25; 105b01c7715SBarry Smith mdiag += 25; 106b01c7715SBarry Smith } 107b01c7715SBarry Smith break; 108b01c7715SBarry Smith default: 109521d7252SBarry Smith SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110b01c7715SBarry Smith } 111b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 112b01c7715SBarry Smith PetscFunctionReturn(0); 113b01c7715SBarry Smith } 11443516a2dSKris Buschelman EXTERN_C_END 115b01c7715SBarry Smith 116b01c7715SBarry Smith #undef __FUNCT__ 117*6d3beeddSMatthew Knepley #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 118*6d3beeddSMatthew Knepley PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119*6d3beeddSMatthew Knepley { 120*6d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121*6d3beeddSMatthew Knepley PetscScalar *x,x1,s1; 122*6d3beeddSMatthew Knepley const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123*6d3beeddSMatthew Knepley PetscErrorCode ierr; 124*6d3beeddSMatthew Knepley PetscInt m = a->mbs,i,i2,nz,idx; 125*6d3beeddSMatthew Knepley const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126*6d3beeddSMatthew Knepley 127*6d3beeddSMatthew Knepley PetscFunctionBegin; 128*6d3beeddSMatthew Knepley if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129*6d3beeddSMatthew Knepley its = its*lits; 130*6d3beeddSMatthew Knepley if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131*6d3beeddSMatthew Knepley if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132*6d3beeddSMatthew Knepley if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133*6d3beeddSMatthew Knepley if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134*6d3beeddSMatthew Knepley if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135*6d3beeddSMatthew Knepley 136*6d3beeddSMatthew Knepley if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137*6d3beeddSMatthew Knepley 138*6d3beeddSMatthew Knepley diag = a->diag; 139*6d3beeddSMatthew Knepley idiag = a->idiag; 140*6d3beeddSMatthew Knepley ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141*6d3beeddSMatthew Knepley ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142*6d3beeddSMatthew Knepley 143*6d3beeddSMatthew Knepley if (flag & SOR_ZERO_INITIAL_GUESS) { 144*6d3beeddSMatthew Knepley if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145*6d3beeddSMatthew Knepley x[0] = b[0]*idiag[0]; 146*6d3beeddSMatthew Knepley i2 = 1; 147*6d3beeddSMatthew Knepley idiag += 1; 148*6d3beeddSMatthew Knepley for (i=1; i<m; i++) { 149*6d3beeddSMatthew Knepley v = aa + ai[i]; 150*6d3beeddSMatthew Knepley vi = aj + ai[i]; 151*6d3beeddSMatthew Knepley nz = diag[i] - ai[i]; 152*6d3beeddSMatthew Knepley s1 = b[i2]; 153*6d3beeddSMatthew Knepley while (nz--) { 154*6d3beeddSMatthew Knepley idx = (*vi++); 155*6d3beeddSMatthew Knepley x1 = x[idx]; 156*6d3beeddSMatthew Knepley s1 -= v[0]*x1; 157*6d3beeddSMatthew Knepley v += 1; 158*6d3beeddSMatthew Knepley } 159*6d3beeddSMatthew Knepley x[i2] = idiag[0]*s1; 160*6d3beeddSMatthew Knepley idiag += 1; 161*6d3beeddSMatthew Knepley i2 += 1; 162*6d3beeddSMatthew Knepley } 163*6d3beeddSMatthew Knepley /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 164*6d3beeddSMatthew Knepley ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 165*6d3beeddSMatthew Knepley } 166*6d3beeddSMatthew Knepley if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 167*6d3beeddSMatthew Knepley (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 168*6d3beeddSMatthew Knepley i2 = 0; 169*6d3beeddSMatthew Knepley mdiag = a->idiag+a->mbs; 170*6d3beeddSMatthew Knepley for (i=0; i<m; i++) { 171*6d3beeddSMatthew Knepley x1 = x[i2]; 172*6d3beeddSMatthew Knepley x[i2] = mdiag[0]*x1; 173*6d3beeddSMatthew Knepley mdiag += 1; 174*6d3beeddSMatthew Knepley i2 += 1; 175*6d3beeddSMatthew Knepley } 176*6d3beeddSMatthew Knepley ierr = PetscLogFlops(m);CHKERRQ(ierr); 177*6d3beeddSMatthew Knepley } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178*6d3beeddSMatthew Knepley ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 179*6d3beeddSMatthew Knepley } 180*6d3beeddSMatthew Knepley if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 181*6d3beeddSMatthew Knepley idiag = a->idiag+a->mbs - 1; 182*6d3beeddSMatthew Knepley i2 = m - 1; 183*6d3beeddSMatthew Knepley x1 = x[i2]; 184*6d3beeddSMatthew Knepley x[i2] = idiag[0]*x1; 185*6d3beeddSMatthew Knepley idiag -= 1; 186*6d3beeddSMatthew Knepley i2 -= 1; 187*6d3beeddSMatthew Knepley for (i=m-2; i>=0; i--) { 188*6d3beeddSMatthew Knepley v = aa + (diag[i]+1); 189*6d3beeddSMatthew Knepley vi = aj + diag[i] + 1; 190*6d3beeddSMatthew Knepley nz = ai[i+1] - diag[i] - 1; 191*6d3beeddSMatthew Knepley s1 = x[i2]; 192*6d3beeddSMatthew Knepley while (nz--) { 193*6d3beeddSMatthew Knepley idx = (*vi++); 194*6d3beeddSMatthew Knepley x1 = x[idx]; 195*6d3beeddSMatthew Knepley s1 -= v[0]*x1; 196*6d3beeddSMatthew Knepley v += 1; 197*6d3beeddSMatthew Knepley } 198*6d3beeddSMatthew Knepley x[i2] = idiag[0]*s1; 199*6d3beeddSMatthew Knepley idiag -= 2; 200*6d3beeddSMatthew Knepley i2 -= 1; 201*6d3beeddSMatthew Knepley } 202*6d3beeddSMatthew Knepley ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 203*6d3beeddSMatthew Knepley } 204*6d3beeddSMatthew Knepley } else { 205*6d3beeddSMatthew Knepley SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 206*6d3beeddSMatthew Knepley } 207*6d3beeddSMatthew Knepley ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208*6d3beeddSMatthew Knepley ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209*6d3beeddSMatthew Knepley PetscFunctionReturn(0); 210*6d3beeddSMatthew Knepley } 211*6d3beeddSMatthew Knepley 212*6d3beeddSMatthew Knepley #undef __FUNCT__ 213b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 214c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215b01c7715SBarry Smith { 216b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217b01c7715SBarry Smith PetscScalar *x,x1,x2,s1,s2; 218b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 219dfbe8321SBarry Smith PetscErrorCode ierr; 220c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 221c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 222b01c7715SBarry Smith 223b01c7715SBarry Smith PetscFunctionBegin; 22451f519a2SBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 225b01c7715SBarry Smith its = its*lits; 22677431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 227b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 228b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 22971f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 230b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 231b01c7715SBarry Smith 232b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 233b01c7715SBarry Smith 234b01c7715SBarry Smith diag = a->diag; 235b01c7715SBarry Smith idiag = a->idiag; 2361ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2371ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 238b01c7715SBarry Smith 239b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 240b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 241b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 242b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 243b01c7715SBarry Smith i2 = 2; 244b01c7715SBarry Smith idiag += 4; 245b01c7715SBarry Smith for (i=1; i<m; i++) { 246b01c7715SBarry Smith v = aa + 4*ai[i]; 247b01c7715SBarry Smith vi = aj + ai[i]; 248b01c7715SBarry Smith nz = diag[i] - ai[i]; 249b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; 250b01c7715SBarry Smith while (nz--) { 251b01c7715SBarry Smith idx = 2*(*vi++); 252b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 253b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 254b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 255b01c7715SBarry Smith v += 4; 256b01c7715SBarry Smith } 257b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 258b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 259b01c7715SBarry Smith idiag += 4; 260b01c7715SBarry Smith i2 += 2; 261b01c7715SBarry Smith } 262b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 263efee365bSSatish Balay ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 264b01c7715SBarry Smith } 265b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 266b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 267b01c7715SBarry Smith i2 = 0; 268b01c7715SBarry Smith mdiag = a->idiag+4*a->mbs; 269b01c7715SBarry Smith for (i=0; i<m; i++) { 270b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 271b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 272b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 273b01c7715SBarry Smith mdiag += 4; 274b01c7715SBarry Smith i2 += 2; 275b01c7715SBarry Smith } 276efee365bSSatish Balay ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 277b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 278899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 279b01c7715SBarry Smith } 280b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 281b01c7715SBarry Smith idiag = a->idiag+4*a->mbs - 4; 282b01c7715SBarry Smith i2 = 2*m - 2; 283b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 284b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[2]*x2; 285b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 286b01c7715SBarry Smith idiag -= 4; 287b01c7715SBarry Smith i2 -= 2; 288b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 289b01c7715SBarry Smith v = aa + 4*(diag[i]+1); 290b01c7715SBarry Smith vi = aj + diag[i] + 1; 291b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 292b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; 293b01c7715SBarry Smith while (nz--) { 294b01c7715SBarry Smith idx = 2*(*vi++); 295b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 296b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 297b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 298b01c7715SBarry Smith v += 4; 299b01c7715SBarry Smith } 300b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 301b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 302b01c7715SBarry Smith idiag -= 4; 303b01c7715SBarry Smith i2 -= 2; 304b01c7715SBarry Smith } 305efee365bSSatish Balay ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 306b01c7715SBarry Smith } 307b01c7715SBarry Smith } else { 308634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309b01c7715SBarry Smith } 3101ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3111ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 312b01c7715SBarry Smith PetscFunctionReturn(0); 313b01c7715SBarry Smith } 314b01c7715SBarry Smith 315b01c7715SBarry Smith #undef __FUNCT__ 316b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 317c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 318b01c7715SBarry Smith { 319b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 320b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,s1,s2,s3; 321b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 322dfbe8321SBarry Smith PetscErrorCode ierr; 323c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 324c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325b01c7715SBarry Smith 326b01c7715SBarry Smith PetscFunctionBegin; 327b01c7715SBarry Smith its = its*lits; 32871f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 32977431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 33271f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334b01c7715SBarry Smith 335b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 336b01c7715SBarry Smith 337b01c7715SBarry Smith diag = a->diag; 338b01c7715SBarry Smith idiag = a->idiag; 3391ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3401ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 341b01c7715SBarry Smith 342b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 343b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 344b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347b01c7715SBarry Smith i2 = 3; 348b01c7715SBarry Smith idiag += 9; 349b01c7715SBarry Smith for (i=1; i<m; i++) { 350b01c7715SBarry Smith v = aa + 9*ai[i]; 351b01c7715SBarry Smith vi = aj + ai[i]; 352b01c7715SBarry Smith nz = diag[i] - ai[i]; 353b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354b01c7715SBarry Smith while (nz--) { 355b01c7715SBarry Smith idx = 3*(*vi++); 356b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360b01c7715SBarry Smith v += 9; 361b01c7715SBarry Smith } 362b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365b01c7715SBarry Smith idiag += 9; 366b01c7715SBarry Smith i2 += 3; 367b01c7715SBarry Smith } 368b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369efee365bSSatish Balay ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 370b01c7715SBarry Smith } 371b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373b01c7715SBarry Smith i2 = 0; 374b01c7715SBarry Smith mdiag = a->idiag+9*a->mbs; 375b01c7715SBarry Smith for (i=0; i<m; i++) { 376b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380b01c7715SBarry Smith mdiag += 9; 381b01c7715SBarry Smith i2 += 3; 382b01c7715SBarry Smith } 383efee365bSSatish Balay ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 384b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 386b01c7715SBarry Smith } 387b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 388b01c7715SBarry Smith idiag = a->idiag+9*a->mbs - 9; 389b01c7715SBarry Smith i2 = 3*m - 3; 390b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394b01c7715SBarry Smith idiag -= 9; 395b01c7715SBarry Smith i2 -= 3; 396b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 397b01c7715SBarry Smith v = aa + 9*(diag[i]+1); 398b01c7715SBarry Smith vi = aj + diag[i] + 1; 399b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 400b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401b01c7715SBarry Smith while (nz--) { 402b01c7715SBarry Smith idx = 3*(*vi++); 403b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407b01c7715SBarry Smith v += 9; 408b01c7715SBarry Smith } 409b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412b01c7715SBarry Smith idiag -= 9; 413b01c7715SBarry Smith i2 -= 3; 414b01c7715SBarry Smith } 415efee365bSSatish Balay ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 416b01c7715SBarry Smith } 417b01c7715SBarry Smith } else { 418634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 419b01c7715SBarry Smith } 4201ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4211ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 422b01c7715SBarry Smith PetscFunctionReturn(0); 423b01c7715SBarry Smith } 424b01c7715SBarry Smith 425b01c7715SBarry Smith #undef __FUNCT__ 426b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 427c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 428b01c7715SBarry Smith { 429b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 431b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 432dfbe8321SBarry Smith PetscErrorCode ierr; 433c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 434c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 435b01c7715SBarry Smith 436b01c7715SBarry Smith PetscFunctionBegin; 43771f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 438b01c7715SBarry Smith its = its*lits; 43977431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 440b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 441b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 44271f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 443b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 444b01c7715SBarry Smith 445b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 446b01c7715SBarry Smith 447b01c7715SBarry Smith diag = a->diag; 448b01c7715SBarry Smith idiag = a->idiag; 4491ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4501ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 451b01c7715SBarry Smith 452b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 453b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 454b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 455b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 456b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 457b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 458b01c7715SBarry Smith i2 = 4; 459b01c7715SBarry Smith idiag += 16; 460b01c7715SBarry Smith for (i=1; i<m; i++) { 461b01c7715SBarry Smith v = aa + 16*ai[i]; 462b01c7715SBarry Smith vi = aj + ai[i]; 463b01c7715SBarry Smith nz = diag[i] - ai[i]; 464b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 465b01c7715SBarry Smith while (nz--) { 466b01c7715SBarry Smith idx = 4*(*vi++); 467b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 468b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 469b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 470b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 471b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 472b01c7715SBarry Smith v += 16; 473b01c7715SBarry Smith } 474b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 475b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 476b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 477b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 478b01c7715SBarry Smith idiag += 16; 479b01c7715SBarry Smith i2 += 4; 480b01c7715SBarry Smith } 481b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 482efee365bSSatish Balay ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 483b01c7715SBarry Smith } 484b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 485b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 486b01c7715SBarry Smith i2 = 0; 487b01c7715SBarry Smith mdiag = a->idiag+16*a->mbs; 488b01c7715SBarry Smith for (i=0; i<m; i++) { 489b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 490b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 491b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 492b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 493b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 494b01c7715SBarry Smith mdiag += 16; 495b01c7715SBarry Smith i2 += 4; 496b01c7715SBarry Smith } 497efee365bSSatish Balay ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 498b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 499899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 500b01c7715SBarry Smith } 501b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 502b01c7715SBarry Smith idiag = a->idiag+16*a->mbs - 16; 503b01c7715SBarry Smith i2 = 4*m - 4; 504b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 505b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 506b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 507b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 508b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 509b01c7715SBarry Smith idiag -= 16; 510b01c7715SBarry Smith i2 -= 4; 511b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 512b01c7715SBarry Smith v = aa + 16*(diag[i]+1); 513b01c7715SBarry Smith vi = aj + diag[i] + 1; 514b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 515b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 516b01c7715SBarry Smith while (nz--) { 517b01c7715SBarry Smith idx = 4*(*vi++); 518b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 519b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 520b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 521b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 522b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 523b01c7715SBarry Smith v += 16; 524b01c7715SBarry Smith } 525b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 526b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 527b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 528b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 529b01c7715SBarry Smith idiag -= 16; 530b01c7715SBarry Smith i2 -= 4; 531b01c7715SBarry Smith } 532efee365bSSatish Balay ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 533b01c7715SBarry Smith } 534b01c7715SBarry Smith } else { 535634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 536b01c7715SBarry Smith } 5371ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5381ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 539b01c7715SBarry Smith PetscFunctionReturn(0); 540b01c7715SBarry Smith } 541b01c7715SBarry Smith 542b01c7715SBarry Smith #undef __FUNCT__ 543b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 544c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 545b01c7715SBarry Smith { 546b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 547b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 548b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 549dfbe8321SBarry Smith PetscErrorCode ierr; 550c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 551c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 552b01c7715SBarry Smith 553b01c7715SBarry Smith PetscFunctionBegin; 55471f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 555b01c7715SBarry Smith its = its*lits; 55677431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 557b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 558b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 55971f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 560b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 561b01c7715SBarry Smith 562b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 563b01c7715SBarry Smith 564b01c7715SBarry Smith diag = a->diag; 565b01c7715SBarry Smith idiag = a->idiag; 5661ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5671ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 568b01c7715SBarry Smith 569b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 570b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 571b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 572b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 573b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 574b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 575b01c7715SBarry Smith x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 576b01c7715SBarry Smith i2 = 5; 577b01c7715SBarry Smith idiag += 25; 578b01c7715SBarry Smith for (i=1; i<m; i++) { 579b01c7715SBarry Smith v = aa + 25*ai[i]; 580b01c7715SBarry Smith vi = aj + ai[i]; 581b01c7715SBarry Smith nz = diag[i] - ai[i]; 582b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 583b01c7715SBarry Smith while (nz--) { 584b01c7715SBarry Smith idx = 5*(*vi++); 585b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 586b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 587b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 588b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 589b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 590b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 591b01c7715SBarry Smith v += 25; 592b01c7715SBarry Smith } 593b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 594b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 595b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 596b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 597b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 598b01c7715SBarry Smith idiag += 25; 599b01c7715SBarry Smith i2 += 5; 600b01c7715SBarry Smith } 601b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 602efee365bSSatish Balay ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 603b01c7715SBarry Smith } 604b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 605b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 606b01c7715SBarry Smith i2 = 0; 607b01c7715SBarry Smith mdiag = a->idiag+25*a->mbs; 608b01c7715SBarry Smith for (i=0; i<m; i++) { 609b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 610b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 611b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 612b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 613b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 614b01c7715SBarry Smith x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 615b01c7715SBarry Smith mdiag += 25; 616b01c7715SBarry Smith i2 += 5; 617b01c7715SBarry Smith } 618efee365bSSatish Balay ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 619b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 620899cda47SBarry Smith ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 621b01c7715SBarry Smith } 622b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 623b01c7715SBarry Smith idiag = a->idiag+25*a->mbs - 25; 624b01c7715SBarry Smith i2 = 5*m - 5; 625b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 626b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 627b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 628b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 629b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 630b01c7715SBarry Smith x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 631b01c7715SBarry Smith idiag -= 25; 632b01c7715SBarry Smith i2 -= 5; 633b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 634b01c7715SBarry Smith v = aa + 25*(diag[i]+1); 635b01c7715SBarry Smith vi = aj + diag[i] + 1; 636b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 637b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 638b01c7715SBarry Smith while (nz--) { 639b01c7715SBarry Smith idx = 5*(*vi++); 640b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 641b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 642b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 643b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 644b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 645b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 646b01c7715SBarry Smith v += 25; 647b01c7715SBarry Smith } 648b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 649b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 650b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 651b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 652b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 653b01c7715SBarry Smith idiag -= 25; 654b01c7715SBarry Smith i2 -= 5; 655b01c7715SBarry Smith } 656efee365bSSatish Balay ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 657b01c7715SBarry Smith } 658b01c7715SBarry Smith } else { 659634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 660b01c7715SBarry Smith } 6611ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6621ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 663b01c7715SBarry Smith PetscFunctionReturn(0); 664b01c7715SBarry Smith } 665b01c7715SBarry Smith 666*6d3beeddSMatthew Knepley #undef __FUNCT__ 667*6d3beeddSMatthew Knepley #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 668*6d3beeddSMatthew Knepley PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 669*6d3beeddSMatthew Knepley { 670*6d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 671*6d3beeddSMatthew Knepley PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 672*6d3beeddSMatthew Knepley const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 673*6d3beeddSMatthew Knepley PetscErrorCode ierr; 674*6d3beeddSMatthew Knepley PetscInt m = a->mbs,i,i2,nz,idx; 675*6d3beeddSMatthew Knepley const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 676*6d3beeddSMatthew Knepley 677*6d3beeddSMatthew Knepley PetscFunctionBegin; 678*6d3beeddSMatthew Knepley if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 679*6d3beeddSMatthew Knepley its = its*lits; 680*6d3beeddSMatthew Knepley if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 681*6d3beeddSMatthew Knepley if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 682*6d3beeddSMatthew Knepley if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 683*6d3beeddSMatthew Knepley if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 684*6d3beeddSMatthew Knepley if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 685*6d3beeddSMatthew Knepley 686*6d3beeddSMatthew Knepley if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 687*6d3beeddSMatthew Knepley 688*6d3beeddSMatthew Knepley diag = a->diag; 689*6d3beeddSMatthew Knepley idiag = a->idiag; 690*6d3beeddSMatthew Knepley ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691*6d3beeddSMatthew Knepley ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 692*6d3beeddSMatthew Knepley 693*6d3beeddSMatthew Knepley if (flag & SOR_ZERO_INITIAL_GUESS) { 694*6d3beeddSMatthew Knepley if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 695*6d3beeddSMatthew Knepley x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 696*6d3beeddSMatthew Knepley x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 697*6d3beeddSMatthew Knepley x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 698*6d3beeddSMatthew Knepley x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 699*6d3beeddSMatthew Knepley x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 700*6d3beeddSMatthew Knepley x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 701*6d3beeddSMatthew Knepley i2 = 6; 702*6d3beeddSMatthew Knepley idiag += 36; 703*6d3beeddSMatthew Knepley for (i=1; i<m; i++) { 704*6d3beeddSMatthew Knepley v = aa + 36*ai[i]; 705*6d3beeddSMatthew Knepley vi = aj + ai[i]; 706*6d3beeddSMatthew Knepley nz = diag[i] - ai[i]; 707*6d3beeddSMatthew Knepley s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 708*6d3beeddSMatthew Knepley while (nz--) { 709*6d3beeddSMatthew Knepley idx = 6*(*vi++); 710*6d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 711*6d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 712*6d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 713*6d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 714*6d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 715*6d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 716*6d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 717*6d3beeddSMatthew Knepley v += 36; 718*6d3beeddSMatthew Knepley } 719*6d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 720*6d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 721*6d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 722*6d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 723*6d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 724*6d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 725*6d3beeddSMatthew Knepley idiag += 36; 726*6d3beeddSMatthew Knepley i2 += 6; 727*6d3beeddSMatthew Knepley } 728*6d3beeddSMatthew Knepley /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 729*6d3beeddSMatthew Knepley ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 730*6d3beeddSMatthew Knepley } 731*6d3beeddSMatthew Knepley if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 732*6d3beeddSMatthew Knepley (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 733*6d3beeddSMatthew Knepley i2 = 0; 734*6d3beeddSMatthew Knepley mdiag = a->idiag+36*a->mbs; 735*6d3beeddSMatthew Knepley for (i=0; i<m; i++) { 736*6d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 737*6d3beeddSMatthew Knepley x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 738*6d3beeddSMatthew Knepley x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 739*6d3beeddSMatthew Knepley x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 740*6d3beeddSMatthew Knepley x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 741*6d3beeddSMatthew Knepley x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 742*6d3beeddSMatthew Knepley x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 743*6d3beeddSMatthew Knepley mdiag += 36; 744*6d3beeddSMatthew Knepley i2 += 6; 745*6d3beeddSMatthew Knepley } 746*6d3beeddSMatthew Knepley ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 747*6d3beeddSMatthew Knepley } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748*6d3beeddSMatthew Knepley ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 749*6d3beeddSMatthew Knepley } 750*6d3beeddSMatthew Knepley if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 751*6d3beeddSMatthew Knepley idiag = a->idiag+36*a->mbs - 36; 752*6d3beeddSMatthew Knepley i2 = 6*m - 6; 753*6d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 754*6d3beeddSMatthew Knepley x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 755*6d3beeddSMatthew Knepley x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 756*6d3beeddSMatthew Knepley x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 757*6d3beeddSMatthew Knepley x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 758*6d3beeddSMatthew Knepley x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 759*6d3beeddSMatthew Knepley x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 760*6d3beeddSMatthew Knepley idiag -= 36; 761*6d3beeddSMatthew Knepley i2 -= 6; 762*6d3beeddSMatthew Knepley for (i=m-2; i>=0; i--) { 763*6d3beeddSMatthew Knepley v = aa + 36*(diag[i]+1); 764*6d3beeddSMatthew Knepley vi = aj + diag[i] + 1; 765*6d3beeddSMatthew Knepley nz = ai[i+1] - diag[i] - 1; 766*6d3beeddSMatthew Knepley s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 767*6d3beeddSMatthew Knepley while (nz--) { 768*6d3beeddSMatthew Knepley idx = 6*(*vi++); 769*6d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 770*6d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 771*6d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 772*6d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 773*6d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 774*6d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 775*6d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 776*6d3beeddSMatthew Knepley v += 36; 777*6d3beeddSMatthew Knepley } 778*6d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 779*6d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 780*6d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 781*6d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 782*6d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 783*6d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 784*6d3beeddSMatthew Knepley idiag -= 36; 785*6d3beeddSMatthew Knepley i2 -= 6; 786*6d3beeddSMatthew Knepley } 787*6d3beeddSMatthew Knepley ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 788*6d3beeddSMatthew Knepley } 789*6d3beeddSMatthew Knepley } else { 790*6d3beeddSMatthew Knepley SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 791*6d3beeddSMatthew Knepley } 792*6d3beeddSMatthew Knepley ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 793*6d3beeddSMatthew Knepley ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 794*6d3beeddSMatthew Knepley PetscFunctionReturn(0); 795*6d3beeddSMatthew Knepley } 796*6d3beeddSMatthew Knepley 797*6d3beeddSMatthew Knepley #undef __FUNCT__ 798*6d3beeddSMatthew Knepley #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 799*6d3beeddSMatthew Knepley PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 800*6d3beeddSMatthew Knepley { 801*6d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 802*6d3beeddSMatthew Knepley PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 803*6d3beeddSMatthew Knepley const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 804*6d3beeddSMatthew Knepley PetscErrorCode ierr; 805*6d3beeddSMatthew Knepley PetscInt m = a->mbs,i,i2,nz,idx; 806*6d3beeddSMatthew Knepley const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 807*6d3beeddSMatthew Knepley 808*6d3beeddSMatthew Knepley PetscFunctionBegin; 809*6d3beeddSMatthew Knepley if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 810*6d3beeddSMatthew Knepley its = its*lits; 811*6d3beeddSMatthew Knepley if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 812*6d3beeddSMatthew Knepley if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 813*6d3beeddSMatthew Knepley if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 814*6d3beeddSMatthew Knepley if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 815*6d3beeddSMatthew Knepley if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 816*6d3beeddSMatthew Knepley 817*6d3beeddSMatthew Knepley if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 818*6d3beeddSMatthew Knepley 819*6d3beeddSMatthew Knepley diag = a->diag; 820*6d3beeddSMatthew Knepley idiag = a->idiag; 821*6d3beeddSMatthew Knepley ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822*6d3beeddSMatthew Knepley ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 823*6d3beeddSMatthew Knepley 824*6d3beeddSMatthew Knepley if (flag & SOR_ZERO_INITIAL_GUESS) { 825*6d3beeddSMatthew Knepley if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 826*6d3beeddSMatthew Knepley x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 827*6d3beeddSMatthew Knepley x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 828*6d3beeddSMatthew Knepley x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 829*6d3beeddSMatthew Knepley x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 830*6d3beeddSMatthew Knepley x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 831*6d3beeddSMatthew Knepley x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 832*6d3beeddSMatthew Knepley x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 833*6d3beeddSMatthew Knepley i2 = 7; 834*6d3beeddSMatthew Knepley idiag += 49; 835*6d3beeddSMatthew Knepley for (i=1; i<m; i++) { 836*6d3beeddSMatthew Knepley v = aa + 49*ai[i]; 837*6d3beeddSMatthew Knepley vi = aj + ai[i]; 838*6d3beeddSMatthew Knepley nz = diag[i] - ai[i]; 839*6d3beeddSMatthew Knepley s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 840*6d3beeddSMatthew Knepley while (nz--) { 841*6d3beeddSMatthew Knepley idx = 7*(*vi++); 842*6d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 843*6d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 844*6d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 845*6d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 846*6d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 847*6d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 848*6d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 849*6d3beeddSMatthew Knepley s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 850*6d3beeddSMatthew Knepley v += 49; 851*6d3beeddSMatthew Knepley } 852*6d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 853*6d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 854*6d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 855*6d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 856*6d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 857*6d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 858*6d3beeddSMatthew Knepley x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 859*6d3beeddSMatthew Knepley idiag += 49; 860*6d3beeddSMatthew Knepley i2 += 7; 861*6d3beeddSMatthew Knepley } 862*6d3beeddSMatthew Knepley /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 863*6d3beeddSMatthew Knepley ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 864*6d3beeddSMatthew Knepley } 865*6d3beeddSMatthew Knepley if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 866*6d3beeddSMatthew Knepley (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 867*6d3beeddSMatthew Knepley i2 = 0; 868*6d3beeddSMatthew Knepley mdiag = a->idiag+49*a->mbs; 869*6d3beeddSMatthew Knepley for (i=0; i<m; i++) { 870*6d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 871*6d3beeddSMatthew Knepley x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 872*6d3beeddSMatthew Knepley x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 873*6d3beeddSMatthew Knepley x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 874*6d3beeddSMatthew Knepley x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 875*6d3beeddSMatthew Knepley x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 876*6d3beeddSMatthew Knepley x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 877*6d3beeddSMatthew Knepley x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 878*6d3beeddSMatthew Knepley mdiag += 36; 879*6d3beeddSMatthew Knepley i2 += 6; 880*6d3beeddSMatthew Knepley } 881*6d3beeddSMatthew Knepley ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 882*6d3beeddSMatthew Knepley } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 883*6d3beeddSMatthew Knepley ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 884*6d3beeddSMatthew Knepley } 885*6d3beeddSMatthew Knepley if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 886*6d3beeddSMatthew Knepley idiag = a->idiag+49*a->mbs - 49; 887*6d3beeddSMatthew Knepley i2 = 7*m - 7; 888*6d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 889*6d3beeddSMatthew Knepley x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 890*6d3beeddSMatthew Knepley x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 891*6d3beeddSMatthew Knepley x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 892*6d3beeddSMatthew Knepley x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 893*6d3beeddSMatthew Knepley x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 894*6d3beeddSMatthew Knepley x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 895*6d3beeddSMatthew Knepley x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 896*6d3beeddSMatthew Knepley idiag -= 49; 897*6d3beeddSMatthew Knepley i2 -= 7; 898*6d3beeddSMatthew Knepley for (i=m-2; i>=0; i--) { 899*6d3beeddSMatthew Knepley v = aa + 49*(diag[i]+1); 900*6d3beeddSMatthew Knepley vi = aj + diag[i] + 1; 901*6d3beeddSMatthew Knepley nz = ai[i+1] - diag[i] - 1; 902*6d3beeddSMatthew Knepley s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 903*6d3beeddSMatthew Knepley while (nz--) { 904*6d3beeddSMatthew Knepley idx = 7*(*vi++); 905*6d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 906*6d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 907*6d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 908*6d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 909*6d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 910*6d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 911*6d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 912*6d3beeddSMatthew Knepley s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 913*6d3beeddSMatthew Knepley v += 49; 914*6d3beeddSMatthew Knepley } 915*6d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 916*6d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 917*6d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 918*6d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 919*6d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 920*6d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 921*6d3beeddSMatthew Knepley x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 922*6d3beeddSMatthew Knepley idiag -= 49; 923*6d3beeddSMatthew Knepley i2 -= 7; 924*6d3beeddSMatthew Knepley } 925*6d3beeddSMatthew Knepley ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 926*6d3beeddSMatthew Knepley } 927*6d3beeddSMatthew Knepley } else { 928*6d3beeddSMatthew Knepley SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 929*6d3beeddSMatthew Knepley } 930*6d3beeddSMatthew Knepley ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 931*6d3beeddSMatthew Knepley ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 932*6d3beeddSMatthew Knepley PetscFunctionReturn(0); 933*6d3beeddSMatthew Knepley } 934*6d3beeddSMatthew Knepley 935af674e45SBarry Smith /* 93681824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 937af674e45SBarry Smith */ 938af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 939af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 940af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 941af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 942af674e45SBarry Smith #endif 943af674e45SBarry Smith 9449c8c1041SBarry Smith EXTERN_C_BEGIN 945af674e45SBarry Smith #undef __FUNCT__ 946af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 947be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 948af674e45SBarry Smith { 949af674e45SBarry Smith Mat A = *AA; 950af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 951c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 952c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 95317ec6a02SBarry Smith PetscInt *aj=a->j,stepval,lastcol = -1; 954f15d580aSBarry Smith const PetscScalar *value = v; 9554bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 956af674e45SBarry Smith 957af674e45SBarry Smith PetscFunctionBegin; 9587adad957SLisandro Dalcin if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 959af674e45SBarry Smith stepval = (n-1)*4; 960af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 961af674e45SBarry Smith row = im[k]; 962af674e45SBarry Smith rp = aj + ai[row]; 963af674e45SBarry Smith ap = aa + 16*ai[row]; 964af674e45SBarry Smith nrow = ailen[row]; 965af674e45SBarry Smith low = 0; 96617ec6a02SBarry Smith high = nrow; 967af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 968af674e45SBarry Smith col = in[l]; 96917ec6a02SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 97017ec6a02SBarry Smith lastcol = col; 9711e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 972af674e45SBarry Smith while (high-low > 7) { 973af674e45SBarry Smith t = (low+high)/2; 974af674e45SBarry Smith if (rp[t] > col) high = t; 975af674e45SBarry Smith else low = t; 976af674e45SBarry Smith } 977af674e45SBarry Smith for (i=low; i<high; i++) { 978af674e45SBarry Smith if (rp[i] > col) break; 979af674e45SBarry Smith if (rp[i] == col) { 980af674e45SBarry Smith bap = ap + 16*i; 981af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 982af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 983af674e45SBarry Smith bap[jj] += *value++; 984af674e45SBarry Smith } 985af674e45SBarry Smith } 986af674e45SBarry Smith goto noinsert2; 987af674e45SBarry Smith } 988af674e45SBarry Smith } 989af674e45SBarry Smith N = nrow++ - 1; 99017ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 991af674e45SBarry Smith /* shift up all the later entries in this row */ 992af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 993af674e45SBarry Smith rp[ii+1] = rp[ii]; 994a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 995af674e45SBarry Smith } 996af674e45SBarry Smith if (N >= i) { 997a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 998af674e45SBarry Smith } 999af674e45SBarry Smith rp[i] = col; 1000af674e45SBarry Smith bap = ap + 16*i; 1001af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1002af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1003af674e45SBarry Smith bap[jj] = *value++; 1004af674e45SBarry Smith } 1005af674e45SBarry Smith } 1006af674e45SBarry Smith noinsert2:; 1007af674e45SBarry Smith low = i; 1008af674e45SBarry Smith } 1009af674e45SBarry Smith ailen[row] = nrow; 1010af674e45SBarry Smith } 1011be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1012af674e45SBarry Smith } 10139c8c1041SBarry Smith EXTERN_C_END 1014af674e45SBarry Smith 1015af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1016af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1017af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1018af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1019af674e45SBarry Smith #endif 1020af674e45SBarry Smith 10219c8c1041SBarry Smith EXTERN_C_BEGIN 1022af674e45SBarry Smith #undef __FUNCT__ 1023af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 1024be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1025af674e45SBarry Smith { 1026af674e45SBarry Smith Mat A = *AA; 1027af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1028c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1029c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 1030c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 103117ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 1032af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 1033af674e45SBarry Smith 1034af674e45SBarry Smith PetscFunctionBegin; 1035af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 1036af674e45SBarry Smith row = im[k]; brow = row/4; 1037af674e45SBarry Smith rp = aj + ai[brow]; 1038af674e45SBarry Smith ap = aa + 16*ai[brow]; 1039af674e45SBarry Smith nrow = ailen[brow]; 1040af674e45SBarry Smith low = 0; 104117ec6a02SBarry Smith high = nrow; 1042af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 1043af674e45SBarry Smith col = in[l]; bcol = col/4; 1044af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 1045af674e45SBarry Smith value = v[l + k*n]; 104617ec6a02SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 104717ec6a02SBarry Smith lastcol = col; 1048af674e45SBarry Smith while (high-low > 7) { 1049af674e45SBarry Smith t = (low+high)/2; 1050af674e45SBarry Smith if (rp[t] > bcol) high = t; 1051af674e45SBarry Smith else low = t; 1052af674e45SBarry Smith } 1053af674e45SBarry Smith for (i=low; i<high; i++) { 1054af674e45SBarry Smith if (rp[i] > bcol) break; 1055af674e45SBarry Smith if (rp[i] == bcol) { 1056af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 1057af674e45SBarry Smith *bap += value; 1058af674e45SBarry Smith goto noinsert1; 1059af674e45SBarry Smith } 1060af674e45SBarry Smith } 1061af674e45SBarry Smith N = nrow++ - 1; 106217ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1063af674e45SBarry Smith /* shift up all the later entries in this row */ 1064af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 1065af674e45SBarry Smith rp[ii+1] = rp[ii]; 1066a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1067af674e45SBarry Smith } 1068af674e45SBarry Smith if (N>=i) { 1069a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1070af674e45SBarry Smith } 1071af674e45SBarry Smith rp[i] = bcol; 1072af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 1073af674e45SBarry Smith noinsert1:; 1074af674e45SBarry Smith low = i; 1075af674e45SBarry Smith } 1076af674e45SBarry Smith ailen[brow] = nrow; 1077af674e45SBarry Smith } 1078be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1079af674e45SBarry Smith } 10809c8c1041SBarry Smith EXTERN_C_END 1081af674e45SBarry Smith 108295d5f7c2SBarry Smith /* UGLY, ugly, ugly 108387828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 10843477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 108595d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 108695d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 108795d5f7c2SBarry Smith into the single precision data structures. 108895d5f7c2SBarry Smith */ 108995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 1090690b6cddSBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 109195d5f7c2SBarry Smith #else 109295d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 109395d5f7c2SBarry Smith #endif 109495d5f7c2SBarry Smith 10952d61bbb3SSatish Balay #define CHUNKSIZE 10 1096de6a44a3SBarry Smith 1097be5855fcSBarry Smith /* 1098be5855fcSBarry Smith Checks for missing diagonals 1099be5855fcSBarry Smith */ 11004a2ae208SSatish Balay #undef __FUNCT__ 11014a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 11022af78befSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1103be5855fcSBarry Smith { 1104be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11056849ba73SBarry Smith PetscErrorCode ierr; 1106c1ac3661SBarry Smith PetscInt *diag,*jj = a->j,i; 1107be5855fcSBarry Smith 1108be5855fcSBarry Smith PetscFunctionBegin; 1109c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 11102af78befSBarry Smith *missing = PETSC_FALSE; 1111883fce79SBarry Smith diag = a->diag; 11120e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 1113be5855fcSBarry Smith if (jj[diag[i]] != i) { 11142af78befSBarry Smith *missing = PETSC_TRUE; 11152af78befSBarry Smith if (d) *d = i; 1116be5855fcSBarry Smith } 1117be5855fcSBarry Smith } 1118be5855fcSBarry Smith PetscFunctionReturn(0); 1119be5855fcSBarry Smith } 1120be5855fcSBarry Smith 11214a2ae208SSatish Balay #undef __FUNCT__ 11224a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1123dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124de6a44a3SBarry Smith { 1125de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11266849ba73SBarry Smith PetscErrorCode ierr; 112709f38230SBarry Smith PetscInt i,j,m = a->mbs; 1128de6a44a3SBarry Smith 11293a40ed3dSBarry Smith PetscFunctionBegin; 113009f38230SBarry Smith if (!a->diag) { 113109f38230SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 113209f38230SBarry Smith } 11337fc0212eSBarry Smith for (i=0; i<m; i++) { 113409f38230SBarry Smith a->diag[i] = a->i[i+1]; 1135de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 1136de6a44a3SBarry Smith if (a->j[j] == i) { 113709f38230SBarry Smith a->diag[i] = j; 1138de6a44a3SBarry Smith break; 1139de6a44a3SBarry Smith } 1140de6a44a3SBarry Smith } 1141de6a44a3SBarry Smith } 11423a40ed3dSBarry Smith PetscFunctionReturn(0); 1143de6a44a3SBarry Smith } 11442593348eSBarry Smith 11452593348eSBarry Smith 1146690b6cddSBarry Smith EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 11473b2fbd54SBarry Smith 11484a2ae208SSatish Balay #undef __FUNCT__ 11494a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 11508f7157efSSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 11513b2fbd54SBarry Smith { 11523b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1153dfbe8321SBarry Smith PetscErrorCode ierr; 11549985e31cSBarry Smith PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 11558f7157efSSatish Balay PetscInt *tia, *tja; 11563b2fbd54SBarry Smith 11573a40ed3dSBarry Smith PetscFunctionBegin; 11583b2fbd54SBarry Smith *nn = n; 11593a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 11603b2fbd54SBarry Smith if (symmetric) { 11618f7157efSSatish Balay ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 11623b2fbd54SBarry Smith } else { 11638f7157efSSatish Balay tia = a->i; tja = a->j; 11643b2fbd54SBarry Smith } 11653b2fbd54SBarry Smith 1166ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1167ecc77c7aSBarry Smith (*nn) *= bs; 1168ecc77c7aSBarry Smith nbs = bs; 11698f7157efSSatish Balay /* malloc & create the natural set of indices */ 1170ecc77c7aSBarry Smith ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 11719985e31cSBarry Smith if (n) { 1172ecc77c7aSBarry Smith (*ia)[0] = 0; 1173ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1174ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1175ecc77c7aSBarry Smith } 11769985e31cSBarry Smith } 1177ecc77c7aSBarry Smith 1178ecc77c7aSBarry Smith for (i=1; i<n; i++) { 1179ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1180ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1181ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 11828f7157efSSatish Balay } 11838f7157efSSatish Balay } 11849985e31cSBarry Smith if (n) { 1185ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 11869985e31cSBarry Smith } 1187ecc77c7aSBarry Smith 11889985e31cSBarry Smith if (ja) { 11899985e31cSBarry Smith ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 11909985e31cSBarry Smith cnt = 0; 11919985e31cSBarry Smith for (i=0; i<n; i++) { 11929985e31cSBarry Smith for (j=0; j<bs; j++) { 11939985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 11949985e31cSBarry Smith for (l=0; l<bs; l++) { 11959985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 11969985e31cSBarry Smith } 11979985e31cSBarry Smith } 11989985e31cSBarry Smith } 11999985e31cSBarry Smith } 12009985e31cSBarry Smith } 12019985e31cSBarry Smith 1202ecc77c7aSBarry Smith n *= bs; 1203ecc77c7aSBarry Smith nz *= bs*bs; 12048f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 12058f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 12068f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 12078f7157efSSatish Balay } 12088f7157efSSatish Balay } else { 12098f7157efSSatish Balay *ia = tia; 1210ecc77c7aSBarry Smith if (ja) *ja = tja; 12118f7157efSSatish Balay } 12128f7157efSSatish Balay if (oshift == 1) { 1213ecc77c7aSBarry Smith for (i=0; i<n+nbs; i++) (*ia)[i]++; 1214ecc77c7aSBarry Smith if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 12158f7157efSSatish Balay } 12163a40ed3dSBarry Smith PetscFunctionReturn(0); 12173b2fbd54SBarry Smith } 12183b2fbd54SBarry Smith 12194a2ae208SSatish Balay #undef __FUNCT__ 12204a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 12218f7157efSSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 12223b2fbd54SBarry Smith { 12233b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 12246849ba73SBarry Smith PetscErrorCode ierr; 12258f7157efSSatish Balay PetscInt i,n = a->mbs,nz = a->i[n]; 12263b2fbd54SBarry Smith 12273a40ed3dSBarry Smith PetscFunctionBegin; 12283a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1229ecc77c7aSBarry Smith if (!blockcompressed && A->rmap.bs > 1) { 1230ecc77c7aSBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 12319985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 12328f7157efSSatish Balay } else if (symmetric) { 1233606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 12349985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 12358f7157efSSatish Balay } else if (oshift == 1) { /* blockcompressed */ 12363b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 12379985e31cSBarry Smith if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 12383b2fbd54SBarry Smith } 12393a40ed3dSBarry Smith PetscFunctionReturn(0); 12403b2fbd54SBarry Smith } 12413b2fbd54SBarry Smith 12424a2ae208SSatish Balay #undef __FUNCT__ 12434a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 1244dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 12452d61bbb3SSatish Balay { 12462d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247dfbe8321SBarry Smith PetscErrorCode ierr; 12482d61bbb3SSatish Balay 1249433994e6SBarry Smith PetscFunctionBegin; 1250aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1251899cda47SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 12522d61bbb3SSatish Balay #endif 1253e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1254c38d4ed2SBarry Smith if (a->row) { 1255c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 1256c38d4ed2SBarry Smith } 1257c38d4ed2SBarry Smith if (a->col) { 1258c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 1259c38d4ed2SBarry Smith } 126005b42c5fSBarry Smith ierr = PetscFree(a->diag);CHKERRQ(ierr); 126105b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 126205b42c5fSBarry Smith ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 126305b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 126405b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1265e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126605b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1267563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 126805b42c5fSBarry Smith ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 1269563b5814SBarry Smith #endif 127005b42c5fSBarry Smith ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 127173e7a558SHong Zhang if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1272c4319e64SHong Zhang 1273606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 1274901853e0SKris Buschelman 1275dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 127643516a2dSKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1277901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1280901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1281901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1282901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 12832d61bbb3SSatish Balay PetscFunctionReturn(0); 12842d61bbb3SSatish Balay } 12852d61bbb3SSatish Balay 12864a2ae208SSatish Balay #undef __FUNCT__ 12874a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 12884e0d8c25SBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 12892d61bbb3SSatish Balay { 12902d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 129163ba0a88SBarry Smith PetscErrorCode ierr; 12922d61bbb3SSatish Balay 12932d61bbb3SSatish Balay PetscFunctionBegin; 1294aa275fccSKris Buschelman switch (op) { 1295aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 12964e0d8c25SBarry Smith a->roworiented = flg; 1297aa275fccSKris Buschelman break; 1298aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 12994e0d8c25SBarry Smith a->keepzeroedrows = flg; 1300aa275fccSKris Buschelman break; 1301512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1302512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 1303aa275fccSKris Buschelman break; 1304aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 13054e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 1306aa275fccSKris Buschelman break; 1307aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 13084e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 1309aa275fccSKris Buschelman break; 13104e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1311aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1312aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1313290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1314aa275fccSKris Buschelman break; 131577e54ba9SKris Buschelman case MAT_SYMMETRIC: 131677e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 13179a4540c5SBarry Smith case MAT_HERMITIAN: 13189a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1319290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 132077e54ba9SKris Buschelman break; 1321aa275fccSKris Buschelman default: 1322ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 13232d61bbb3SSatish Balay } 13242d61bbb3SSatish Balay PetscFunctionReturn(0); 13252d61bbb3SSatish Balay } 13262d61bbb3SSatish Balay 13274a2ae208SSatish Balay #undef __FUNCT__ 13284a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 1329c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13302d61bbb3SSatish Balay { 13312d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13326849ba73SBarry Smith PetscErrorCode ierr; 1333c1ac3661SBarry Smith PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 13343f1db9ecSBarry Smith MatScalar *aa,*aa_i; 133587828ca2SBarry Smith PetscScalar *v_i; 13362d61bbb3SSatish Balay 13372d61bbb3SSatish Balay PetscFunctionBegin; 1338899cda47SBarry Smith bs = A->rmap.bs; 13392d61bbb3SSatish Balay ai = a->i; 13402d61bbb3SSatish Balay aj = a->j; 13412d61bbb3SSatish Balay aa = a->a; 13422d61bbb3SSatish Balay bs2 = a->bs2; 13432d61bbb3SSatish Balay 1344899cda47SBarry Smith if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 13452d61bbb3SSatish Balay 13462d61bbb3SSatish Balay bn = row/bs; /* Block number */ 13472d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 13482d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 13492d61bbb3SSatish Balay *nz = bs*M; 13502d61bbb3SSatish Balay 13512d61bbb3SSatish Balay if (v) { 13522d61bbb3SSatish Balay *v = 0; 13532d61bbb3SSatish Balay if (*nz) { 135487828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 13552d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13562d61bbb3SSatish Balay v_i = *v + i*bs; 13572d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 13582d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 13592d61bbb3SSatish Balay } 13602d61bbb3SSatish Balay } 13612d61bbb3SSatish Balay } 13622d61bbb3SSatish Balay 13632d61bbb3SSatish Balay if (idx) { 13642d61bbb3SSatish Balay *idx = 0; 13652d61bbb3SSatish Balay if (*nz) { 1366c1ac3661SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 13672d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13682d61bbb3SSatish Balay idx_i = *idx + i*bs; 13692d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 13702d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 13712d61bbb3SSatish Balay } 13722d61bbb3SSatish Balay } 13732d61bbb3SSatish Balay } 13742d61bbb3SSatish Balay PetscFunctionReturn(0); 13752d61bbb3SSatish Balay } 13762d61bbb3SSatish Balay 13774a2ae208SSatish Balay #undef __FUNCT__ 13784a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1379c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13802d61bbb3SSatish Balay { 1381dfbe8321SBarry Smith PetscErrorCode ierr; 1382606d414cSSatish Balay 13832d61bbb3SSatish Balay PetscFunctionBegin; 138405b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 138505b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 13862d61bbb3SSatish Balay PetscFunctionReturn(0); 13872d61bbb3SSatish Balay } 13882d61bbb3SSatish Balay 13894a2ae208SSatish Balay #undef __FUNCT__ 13904a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 1391dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 13922d61bbb3SSatish Balay { 13932d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 13942d61bbb3SSatish Balay Mat C; 13956849ba73SBarry Smith PetscErrorCode ierr; 1396899cda47SBarry Smith PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1397c1ac3661SBarry Smith PetscInt *rows,*cols,bs2=a->bs2; 139887828ca2SBarry Smith PetscScalar *array; 13992d61bbb3SSatish Balay 14002d61bbb3SSatish Balay PetscFunctionBegin; 140129bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1402c1ac3661SBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1403c1ac3661SBarry Smith ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 14042d61bbb3SSatish Balay 1405375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 140687828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 140787828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1408375fe846SBarry Smith #else 14093eda8832SBarry Smith array = a->a; 1410375fe846SBarry Smith #endif 1411375fe846SBarry Smith 14122d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 14137adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1414899cda47SBarry Smith ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 14157adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1416ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1417606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 1418c1ac3661SBarry Smith ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 14192d61bbb3SSatish Balay cols = rows + bs; 14202d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 14212d61bbb3SSatish Balay cols[0] = i*bs; 14222d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 14232d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 14242d61bbb3SSatish Balay for (j=0; j<len; j++) { 14252d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 14262d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 14272d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 14282d61bbb3SSatish Balay array += bs2; 14292d61bbb3SSatish Balay } 14302d61bbb3SSatish Balay } 1431606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 1432375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 1433375fe846SBarry Smith ierr = PetscFree(array); 1434375fe846SBarry Smith #endif 14352d61bbb3SSatish Balay 14362d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14372d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14382d61bbb3SSatish Balay 1439c4992f7dSBarry Smith if (B) { 14402d61bbb3SSatish Balay *B = C; 14412d61bbb3SSatish Balay } else { 1442273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 14432d61bbb3SSatish Balay } 14442d61bbb3SSatish Balay PetscFunctionReturn(0); 14452d61bbb3SSatish Balay } 14462d61bbb3SSatish Balay 14474a2ae208SSatish Balay #undef __FUNCT__ 14484a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 14496849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 14502593348eSBarry Smith { 1451b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 14526849ba73SBarry Smith PetscErrorCode ierr; 1453899cda47SBarry Smith PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1454b24ad042SBarry Smith int fd; 145587828ca2SBarry Smith PetscScalar *aa; 1456ce6f0cecSBarry Smith FILE *file; 14572593348eSBarry Smith 14583a40ed3dSBarry Smith PetscFunctionBegin; 1459b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1460899cda47SBarry Smith ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1461552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 14623b2fbd54SBarry Smith 1463899cda47SBarry Smith col_lens[1] = A->rmap.N; 1464899cda47SBarry Smith col_lens[2] = A->cmap.n; 14657e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 14662593348eSBarry Smith 14672593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 1468b6490206SBarry Smith count = 0; 1469b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1470b6490206SBarry Smith for (j=0; j<bs; j++) { 1471b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1472b6490206SBarry Smith } 14732593348eSBarry Smith } 1474899cda47SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1475606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 14762593348eSBarry Smith 14772593348eSBarry Smith /* store column indices (zero start index) */ 1478c1ac3661SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1479b6490206SBarry Smith count = 0; 1480b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1481b6490206SBarry Smith for (j=0; j<bs; j++) { 1482b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1483b6490206SBarry Smith for (l=0; l<bs; l++) { 1484b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 14852593348eSBarry Smith } 14862593348eSBarry Smith } 1487b6490206SBarry Smith } 1488b6490206SBarry Smith } 14896f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1490606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 14912593348eSBarry Smith 14922593348eSBarry Smith /* store nonzero values */ 149387828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1494b6490206SBarry Smith count = 0; 1495b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1496b6490206SBarry Smith for (j=0; j<bs; j++) { 1497b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1498b6490206SBarry Smith for (l=0; l<bs; l++) { 14997e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 1500b6490206SBarry Smith } 1501b6490206SBarry Smith } 1502b6490206SBarry Smith } 1503b6490206SBarry Smith } 15046f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1505606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1506ce6f0cecSBarry Smith 1507b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1508ce6f0cecSBarry Smith if (file) { 1509899cda47SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1510ce6f0cecSBarry Smith } 15113a40ed3dSBarry Smith PetscFunctionReturn(0); 15122593348eSBarry Smith } 15132593348eSBarry Smith 15144a2ae208SSatish Balay #undef __FUNCT__ 15154a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 15166849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 15172593348eSBarry Smith { 1518b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1519dfbe8321SBarry Smith PetscErrorCode ierr; 1520899cda47SBarry Smith PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1521f3ef73ceSBarry Smith PetscViewerFormat format; 15222593348eSBarry Smith 15233a40ed3dSBarry Smith PetscFunctionBegin; 1524b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1525456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 152677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1527fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1528bcd9e38bSBarry Smith Mat aij; 1529ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1530bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 1531bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 153204929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 153304929863SHong Zhang PetscFunctionReturn(0); 1534fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1535b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 153644cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 153744cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 153877431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 153944cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 154044cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1541aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15420e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1543a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 15440e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15450e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1546a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 15470e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15480e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1549a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15500ef38995SBarry Smith } 155144cd7ae7SLois Curfman McInnes #else 15520ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 1553a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 15540ef38995SBarry Smith } 155544cd7ae7SLois Curfman McInnes #endif 155644cd7ae7SLois Curfman McInnes } 155744cd7ae7SLois Curfman McInnes } 1558b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 155944cd7ae7SLois Curfman McInnes } 156044cd7ae7SLois Curfman McInnes } 1561b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 15620ef38995SBarry Smith } else { 1563b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1564b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1565b6490206SBarry Smith for (j=0; j<bs; j++) { 156677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1567b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1568b6490206SBarry Smith for (l=0; l<bs; l++) { 1569aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15700e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1571a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 15720e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15730e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1574a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 15750e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15760ef38995SBarry Smith } else { 1577a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 157888685aaeSLois Curfman McInnes } 157988685aaeSLois Curfman McInnes #else 1580a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 158188685aaeSLois Curfman McInnes #endif 15822593348eSBarry Smith } 15832593348eSBarry Smith } 1584b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 15852593348eSBarry Smith } 15862593348eSBarry Smith } 1587b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1588b6490206SBarry Smith } 1589b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 15903a40ed3dSBarry Smith PetscFunctionReturn(0); 15912593348eSBarry Smith } 15922593348eSBarry Smith 15934a2ae208SSatish Balay #undef __FUNCT__ 15944a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 15956849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 15963270192aSSatish Balay { 159777ed5343SBarry Smith Mat A = (Mat) Aa; 15983270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 15996849ba73SBarry Smith PetscErrorCode ierr; 1600899cda47SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 16010e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 16023f1db9ecSBarry Smith MatScalar *aa; 1603b0a32e0cSBarry Smith PetscViewer viewer; 16043270192aSSatish Balay 16053a40ed3dSBarry Smith PetscFunctionBegin; 16063270192aSSatish Balay 1607b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 160877ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 160977ed5343SBarry Smith 1610b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 161177ed5343SBarry Smith 16123270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1613b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 16143270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16153270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1616899cda47SBarry Smith y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 16173270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16183270192aSSatish Balay aa = a->a + j*bs2; 16193270192aSSatish Balay for (k=0; k<bs; k++) { 16203270192aSSatish Balay for (l=0; l<bs; l++) { 16210e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1622b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16233270192aSSatish Balay } 16243270192aSSatish Balay } 16253270192aSSatish Balay } 16263270192aSSatish Balay } 1627b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 16283270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16293270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1630899cda47SBarry Smith y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 16313270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16323270192aSSatish Balay aa = a->a + j*bs2; 16333270192aSSatish Balay for (k=0; k<bs; k++) { 16343270192aSSatish Balay for (l=0; l<bs; l++) { 16350e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1636b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16373270192aSSatish Balay } 16383270192aSSatish Balay } 16393270192aSSatish Balay } 16403270192aSSatish Balay } 16413270192aSSatish Balay 1642b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 16433270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16443270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1645899cda47SBarry Smith y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 16463270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16473270192aSSatish Balay aa = a->a + j*bs2; 16483270192aSSatish Balay for (k=0; k<bs; k++) { 16493270192aSSatish Balay for (l=0; l<bs; l++) { 16500e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1651b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16523270192aSSatish Balay } 16533270192aSSatish Balay } 16543270192aSSatish Balay } 16553270192aSSatish Balay } 165677ed5343SBarry Smith PetscFunctionReturn(0); 165777ed5343SBarry Smith } 16583270192aSSatish Balay 16594a2ae208SSatish Balay #undef __FUNCT__ 16604a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 16616849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 166277ed5343SBarry Smith { 1663dfbe8321SBarry Smith PetscErrorCode ierr; 16640e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1665b0a32e0cSBarry Smith PetscDraw draw; 166677ed5343SBarry Smith PetscTruth isnull; 16673270192aSSatish Balay 166877ed5343SBarry Smith PetscFunctionBegin; 166977ed5343SBarry Smith 1670b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1671b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 167277ed5343SBarry Smith 167377ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1674899cda47SBarry Smith xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 167577ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1676b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1677b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 167877ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 16793a40ed3dSBarry Smith PetscFunctionReturn(0); 16803270192aSSatish Balay } 16813270192aSSatish Balay 16824a2ae208SSatish Balay #undef __FUNCT__ 16834a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 1684dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 16852593348eSBarry Smith { 1686dfbe8321SBarry Smith PetscErrorCode ierr; 168732077d6dSBarry Smith PetscTruth iascii,isbinary,isdraw; 16882593348eSBarry Smith 16893a40ed3dSBarry Smith PetscFunctionBegin; 169032077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1691fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1692fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 169332077d6dSBarry Smith if (iascii){ 16943a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 16950f5bd95cSBarry Smith } else if (isbinary) { 16963a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 16970f5bd95cSBarry Smith } else if (isdraw) { 16983a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 16995cd90555SBarry Smith } else { 1700a5e6ed63SBarry Smith Mat B; 1701ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1702a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1703a5e6ed63SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 17042593348eSBarry Smith } 17053a40ed3dSBarry Smith PetscFunctionReturn(0); 17062593348eSBarry Smith } 1707b6490206SBarry Smith 1708cd0e1443SSatish Balay 17094a2ae208SSatish Balay #undef __FUNCT__ 17104a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 1711c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1712cd0e1443SSatish Balay { 1713cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1714c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1715c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1716899cda47SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 171797e567efSBarry Smith MatScalar *ap,*aa = a->a; 1718cd0e1443SSatish Balay 17193a40ed3dSBarry Smith PetscFunctionBegin; 17202d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1721cd0e1443SSatish Balay row = im[k]; brow = row/bs; 172297e567efSBarry Smith if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1723899cda47SBarry Smith if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 17242d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 17252c3acbe9SBarry Smith nrow = ailen[brow]; 17262d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 172797e567efSBarry Smith if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1728899cda47SBarry Smith if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 17292d61bbb3SSatish Balay col = in[l] ; 17302d61bbb3SSatish Balay bcol = col/bs; 17312d61bbb3SSatish Balay cidx = col%bs; 17322d61bbb3SSatish Balay ridx = row%bs; 17332d61bbb3SSatish Balay high = nrow; 17342d61bbb3SSatish Balay low = 0; /* assume unsorted */ 17352d61bbb3SSatish Balay while (high-low > 5) { 1736cd0e1443SSatish Balay t = (low+high)/2; 1737cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1738cd0e1443SSatish Balay else low = t; 1739cd0e1443SSatish Balay } 1740cd0e1443SSatish Balay for (i=low; i<high; i++) { 1741cd0e1443SSatish Balay if (rp[i] > bcol) break; 1742cd0e1443SSatish Balay if (rp[i] == bcol) { 17432d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 17442d61bbb3SSatish Balay goto finished; 1745cd0e1443SSatish Balay } 1746cd0e1443SSatish Balay } 174797e567efSBarry Smith *v++ = 0.0; 17482d61bbb3SSatish Balay finished:; 1749cd0e1443SSatish Balay } 1750cd0e1443SSatish Balay } 17513a40ed3dSBarry Smith PetscFunctionReturn(0); 1752cd0e1443SSatish Balay } 1753cd0e1443SSatish Balay 175495d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 17554a2ae208SSatish Balay #undef __FUNCT__ 17564a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1757c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 175895d5f7c2SBarry Smith { 1759563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1760dfbe8321SBarry Smith PetscErrorCode ierr; 1761c1ac3661SBarry Smith PetscInt i,N = m*n*b->bs2; 1762563b5814SBarry Smith MatScalar *vsingle; 176395d5f7c2SBarry Smith 176495d5f7c2SBarry Smith PetscFunctionBegin; 1765563b5814SBarry Smith if (N > b->setvalueslen) { 176605b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1767b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1768563b5814SBarry Smith b->setvalueslen = N; 1769563b5814SBarry Smith } 1770563b5814SBarry Smith vsingle = b->setvaluescopy; 177195d5f7c2SBarry Smith for (i=0; i<N; i++) { 177295d5f7c2SBarry Smith vsingle[i] = v[i]; 177395d5f7c2SBarry Smith } 177495d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 177595d5f7c2SBarry Smith PetscFunctionReturn(0); 177695d5f7c2SBarry Smith } 177795d5f7c2SBarry Smith #endif 177895d5f7c2SBarry Smith 17792d61bbb3SSatish Balay 17804a2ae208SSatish Balay #undef __FUNCT__ 17814a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1782c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 178392c4ed94SBarry Smith { 178492c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1785e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1786c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 17876849ba73SBarry Smith PetscErrorCode ierr; 1788899cda47SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1789273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 1790f15d580aSBarry Smith const MatScalar *value = v; 1791f15d580aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 179292c4ed94SBarry Smith 17933a40ed3dSBarry Smith PetscFunctionBegin; 17940e324ae4SSatish Balay if (roworiented) { 17950e324ae4SSatish Balay stepval = (n-1)*bs; 17960e324ae4SSatish Balay } else { 17970e324ae4SSatish Balay stepval = (m-1)*bs; 17980e324ae4SSatish Balay } 179992c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 180092c4ed94SBarry Smith row = im[k]; 18015ef9f2a5SBarry Smith if (row < 0) continue; 18022515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 180377431f27SBarry Smith if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 180492c4ed94SBarry Smith #endif 180592c4ed94SBarry Smith rp = aj + ai[row]; 180692c4ed94SBarry Smith ap = aa + bs2*ai[row]; 180792c4ed94SBarry Smith rmax = imax[row]; 180892c4ed94SBarry Smith nrow = ailen[row]; 180992c4ed94SBarry Smith low = 0; 1810c71e6ed7SBarry Smith high = nrow; 181192c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 18125ef9f2a5SBarry Smith if (in[l] < 0) continue; 18132515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 181477431f27SBarry Smith if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 181592c4ed94SBarry Smith #endif 181692c4ed94SBarry Smith col = in[l]; 181792c4ed94SBarry Smith if (roworiented) { 18180e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 18190e324ae4SSatish Balay } else { 18200e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 182192c4ed94SBarry Smith } 18227cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 1823e2ee6c50SBarry Smith lastcol = col; 182492c4ed94SBarry Smith while (high-low > 7) { 182592c4ed94SBarry Smith t = (low+high)/2; 182692c4ed94SBarry Smith if (rp[t] > col) high = t; 182792c4ed94SBarry Smith else low = t; 182892c4ed94SBarry Smith } 182992c4ed94SBarry Smith for (i=low; i<high; i++) { 183092c4ed94SBarry Smith if (rp[i] > col) break; 183192c4ed94SBarry Smith if (rp[i] == col) { 18328a84c255SSatish Balay bap = ap + bs2*i; 18330e324ae4SSatish Balay if (roworiented) { 18348a84c255SSatish Balay if (is == ADD_VALUES) { 1835dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1836dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18378a84c255SSatish Balay bap[jj] += *value++; 1838dd9472c6SBarry Smith } 1839dd9472c6SBarry Smith } 18400e324ae4SSatish Balay } else { 1841dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1842dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18430e324ae4SSatish Balay bap[jj] = *value++; 18448a84c255SSatish Balay } 1845dd9472c6SBarry Smith } 1846dd9472c6SBarry Smith } 18470e324ae4SSatish Balay } else { 18480e324ae4SSatish Balay if (is == ADD_VALUES) { 1849dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1850dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18510e324ae4SSatish Balay *bap++ += *value++; 1852dd9472c6SBarry Smith } 1853dd9472c6SBarry Smith } 18540e324ae4SSatish Balay } else { 1855dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1856dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18570e324ae4SSatish Balay *bap++ = *value++; 18580e324ae4SSatish Balay } 18598a84c255SSatish Balay } 1860dd9472c6SBarry Smith } 1861dd9472c6SBarry Smith } 1862f1241b54SBarry Smith goto noinsert2; 186392c4ed94SBarry Smith } 186492c4ed94SBarry Smith } 186589280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 1866085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1867421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1868c03d1d03SSatish Balay N = nrow++ - 1; high++; 186992c4ed94SBarry Smith /* shift up all the later entries in this row */ 187092c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 187192c4ed94SBarry Smith rp[ii+1] = rp[ii]; 1872549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 187392c4ed94SBarry Smith } 1874549d3d68SSatish Balay if (N >= i) { 1875549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1876549d3d68SSatish Balay } 187792c4ed94SBarry Smith rp[i] = col; 18788a84c255SSatish Balay bap = ap + bs2*i; 18790e324ae4SSatish Balay if (roworiented) { 1880dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1881dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18820e324ae4SSatish Balay bap[jj] = *value++; 1883dd9472c6SBarry Smith } 1884dd9472c6SBarry Smith } 18850e324ae4SSatish Balay } else { 1886dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1887dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18880e324ae4SSatish Balay *bap++ = *value++; 18890e324ae4SSatish Balay } 1890dd9472c6SBarry Smith } 1891dd9472c6SBarry Smith } 1892f1241b54SBarry Smith noinsert2:; 189392c4ed94SBarry Smith low = i; 189492c4ed94SBarry Smith } 189592c4ed94SBarry Smith ailen[row] = nrow; 189692c4ed94SBarry Smith } 18973a40ed3dSBarry Smith PetscFunctionReturn(0); 189892c4ed94SBarry Smith } 189926e093fcSHong Zhang 19004a2ae208SSatish Balay #undef __FUNCT__ 19014a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1902dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1903584200bdSSatish Balay { 1904584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1905c1ac3661SBarry Smith PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1906899cda47SBarry Smith PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 19076849ba73SBarry Smith PetscErrorCode ierr; 1908c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 19093f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 19103447b6efSHong Zhang PetscReal ratio=0.6; 1911584200bdSSatish Balay 19123a40ed3dSBarry Smith PetscFunctionBegin; 19133a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1914584200bdSSatish Balay 191543ee02c3SBarry Smith if (m) rmax = ailen[0]; 1916584200bdSSatish Balay for (i=1; i<mbs; i++) { 1917584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1918584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1919d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1920584200bdSSatish Balay if (fshift) { 1921a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1922584200bdSSatish Balay N = ailen[i]; 1923584200bdSSatish Balay for (j=0; j<N; j++) { 1924584200bdSSatish Balay ip[j-fshift] = ip[j]; 1925549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1926584200bdSSatish Balay } 1927584200bdSSatish Balay } 1928584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1929584200bdSSatish Balay } 1930584200bdSSatish Balay if (mbs) { 1931584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1932584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1933584200bdSSatish Balay } 1934584200bdSSatish Balay /* reset ilen and imax for each row */ 1935584200bdSSatish Balay for (i=0; i<mbs; i++) { 1936584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1937584200bdSSatish Balay } 1938a7c10996SSatish Balay a->nz = ai[mbs]; 1939584200bdSSatish Balay 1940584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1941b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1942584200bdSSatish Balay if (fshift && a->diag) { 1943606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 194452e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945584200bdSSatish Balay a->diag = 0; 1946584200bdSSatish Balay } 1947899cda47SBarry Smith ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1948ae15b995SBarry Smith ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1949ae15b995SBarry Smith ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1950e2f3b5e9SSatish Balay a->reallocs = 0; 19510e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1952cf4441caSHong Zhang 1953cb5d8e9eSHong Zhang /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 19542f53aa61SHong Zhang if (a->compressedrow.use){ 1955317fbc4cSHong Zhang ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 195673e7a558SHong Zhang } 195788e51ccdSHong Zhang 195888e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 19593a40ed3dSBarry Smith PetscFunctionReturn(0); 1960584200bdSSatish Balay } 1961584200bdSSatish Balay 1962bea157c4SSatish Balay /* 1963bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1964bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1965bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1966bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1967bea157c4SSatish Balay */ 19684a2ae208SSatish Balay #undef __FUNCT__ 19694a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1970c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1971d9b7c43dSSatish Balay { 1972c1ac3661SBarry Smith PetscInt i,j,k,row; 1973bea157c4SSatish Balay PetscTruth flg; 19743a40ed3dSBarry Smith 1975433994e6SBarry Smith PetscFunctionBegin; 1976bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1977bea157c4SSatish Balay row = idx[i]; 1978bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1979bea157c4SSatish Balay sizes[j] = 1; 1980bea157c4SSatish Balay i++; 1981e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1982bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1983bea157c4SSatish Balay i++; 1984bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1985bea157c4SSatish Balay flg = PETSC_TRUE; 1986bea157c4SSatish Balay for (k=1; k<bs; k++) { 1987bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1988bea157c4SSatish Balay flg = PETSC_FALSE; 1989bea157c4SSatish Balay break; 1990d9b7c43dSSatish Balay } 1991bea157c4SSatish Balay } 1992abc0a331SBarry Smith if (flg) { /* No break in the bs */ 1993bea157c4SSatish Balay sizes[j] = bs; 1994bea157c4SSatish Balay i+= bs; 1995bea157c4SSatish Balay } else { 1996bea157c4SSatish Balay sizes[j] = 1; 1997bea157c4SSatish Balay i++; 1998bea157c4SSatish Balay } 1999bea157c4SSatish Balay } 2000bea157c4SSatish Balay } 2001bea157c4SSatish Balay *bs_max = j; 20023a40ed3dSBarry Smith PetscFunctionReturn(0); 2003d9b7c43dSSatish Balay } 2004d9b7c43dSSatish Balay 20054a2ae208SSatish Balay #undef __FUNCT__ 20064a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2007f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2008d9b7c43dSSatish Balay { 2009d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2010dfbe8321SBarry Smith PetscErrorCode ierr; 2011f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 2012899cda47SBarry Smith PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 201387828ca2SBarry Smith PetscScalar zero = 0.0; 20143f1db9ecSBarry Smith MatScalar *aa; 2015d9b7c43dSSatish Balay 20163a40ed3dSBarry Smith PetscFunctionBegin; 2017d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2018bea157c4SSatish Balay /* allocate memory for rows,sizes */ 2019c1ac3661SBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2020bea157c4SSatish Balay sizes = rows + is_n; 2021bea157c4SSatish Balay 2022563b5814SBarry Smith /* copy IS values to rows, and sort them */ 2023bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2024bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2025dffd3267SBarry Smith if (baij->keepzeroedrows) { 2026dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 2027dffd3267SBarry Smith bs_max = is_n; 202888e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 2029dffd3267SBarry Smith } else { 2030bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 203188e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 2032dffd3267SBarry Smith } 2033bea157c4SSatish Balay 2034bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2035bea157c4SSatish Balay row = rows[j]; 2036899cda47SBarry Smith if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2037bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2038b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2039dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 2040f4df32b1SMatthew Knepley if (diag != 0.0) { 2041bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 2042bea157c4SSatish Balay baij->ilen[row/bs] = 1; 2043bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 2044bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2045a07cd24cSSatish Balay } 2046563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2047bea157c4SSatish Balay for (k=0; k<bs; k++) { 2048f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2049bea157c4SSatish Balay } 2050f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2051bea157c4SSatish Balay baij->ilen[row/bs] = 0; 2052f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2053bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 2054aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 2055634064b4SBarry Smith if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2056bea157c4SSatish Balay #endif 2057bea157c4SSatish Balay for (k=0; k<count; k++) { 2058d9b7c43dSSatish Balay aa[0] = zero; 2059d9b7c43dSSatish Balay aa += bs; 2060d9b7c43dSSatish Balay } 2061f4df32b1SMatthew Knepley if (diag != 0.0) { 2062f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2063d9b7c43dSSatish Balay } 2064d9b7c43dSSatish Balay } 2065bea157c4SSatish Balay } 2066bea157c4SSatish Balay 2067606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 20689a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 20693a40ed3dSBarry Smith PetscFunctionReturn(0); 2070d9b7c43dSSatish Balay } 20711c351548SSatish Balay 20724a2ae208SSatish Balay #undef __FUNCT__ 20734a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 2074c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 20752d61bbb3SSatish Balay { 20762d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2077e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2078c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2079899cda47SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 20806849ba73SBarry Smith PetscErrorCode ierr; 2081c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 2082273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 20833f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 20842d61bbb3SSatish Balay 20852d61bbb3SSatish Balay PetscFunctionBegin; 20862d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 2087085a36d4SBarry Smith row = im[k]; 2088085a36d4SBarry Smith brow = row/bs; 20895ef9f2a5SBarry Smith if (row < 0) continue; 20902515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 2091899cda47SBarry Smith if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 20922d61bbb3SSatish Balay #endif 20932d61bbb3SSatish Balay rp = aj + ai[brow]; 20942d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 20952d61bbb3SSatish Balay rmax = imax[brow]; 20962d61bbb3SSatish Balay nrow = ailen[brow]; 20972d61bbb3SSatish Balay low = 0; 2098c71e6ed7SBarry Smith high = nrow; 20992d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 21005ef9f2a5SBarry Smith if (in[l] < 0) continue; 21012515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 2102899cda47SBarry Smith if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 21032d61bbb3SSatish Balay #endif 21042d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 21052d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 21062d61bbb3SSatish Balay if (roworiented) { 21075ef9f2a5SBarry Smith value = v[l + k*n]; 21082d61bbb3SSatish Balay } else { 21092d61bbb3SSatish Balay value = v[k + l*m]; 21102d61bbb3SSatish Balay } 21117cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 2112e2ee6c50SBarry Smith lastcol = col; 21132d61bbb3SSatish Balay while (high-low > 7) { 21142d61bbb3SSatish Balay t = (low+high)/2; 21152d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 21162d61bbb3SSatish Balay else low = t; 21172d61bbb3SSatish Balay } 21182d61bbb3SSatish Balay for (i=low; i<high; i++) { 21192d61bbb3SSatish Balay if (rp[i] > bcol) break; 21202d61bbb3SSatish Balay if (rp[i] == bcol) { 21212d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 21222d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 21232d61bbb3SSatish Balay else *bap = value; 21242d61bbb3SSatish Balay goto noinsert1; 21252d61bbb3SSatish Balay } 21262d61bbb3SSatish Balay } 21272d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 2128085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2129421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2130c03d1d03SSatish Balay N = nrow++ - 1; high++; 21312d61bbb3SSatish Balay /* shift up all the later entries in this row */ 21322d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 21332d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 2134549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 21352d61bbb3SSatish Balay } 2136549d3d68SSatish Balay if (N>=i) { 2137549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2138549d3d68SSatish Balay } 21392d61bbb3SSatish Balay rp[i] = bcol; 21402d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 2141085a36d4SBarry Smith a->nz++; 21422d61bbb3SSatish Balay noinsert1:; 21432d61bbb3SSatish Balay low = i; 21442d61bbb3SSatish Balay } 21452d61bbb3SSatish Balay ailen[brow] = nrow; 21462d61bbb3SSatish Balay } 214788e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 21482d61bbb3SSatish Balay PetscFunctionReturn(0); 21492d61bbb3SSatish Balay } 21502d61bbb3SSatish Balay 21512d61bbb3SSatish Balay 21524a2ae208SSatish Balay #undef __FUNCT__ 21534a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2154dfbe8321SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 21552d61bbb3SSatish Balay { 21562d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 21572d61bbb3SSatish Balay Mat outA; 2158dfbe8321SBarry Smith PetscErrorCode ierr; 2159667159a5SBarry Smith PetscTruth row_identity,col_identity; 21602d61bbb3SSatish Balay 21612d61bbb3SSatish Balay PetscFunctionBegin; 2162d3d32019SBarry Smith if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2163667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2164667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2165667159a5SBarry Smith if (!row_identity || !col_identity) { 2166634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2167667159a5SBarry Smith } 21682d61bbb3SSatish Balay 21692d61bbb3SSatish Balay outA = inA; 21702d61bbb3SSatish Balay inA->factor = FACTOR_LU; 21712d61bbb3SSatish Balay 2172c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2173cf242676SKris Buschelman 2174c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2175c3122656SLisandro Dalcin if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2176c3122656SLisandro Dalcin a->row = row; 2177c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2178c3122656SLisandro Dalcin if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2179c3122656SLisandro Dalcin a->col = col; 2180c38d4ed2SBarry Smith 2181c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 21824c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 218352e6d16bSBarry Smith ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2184c38d4ed2SBarry Smith 2185cf242676SKris Buschelman /* 2186cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2187cf242676SKris Buschelman for ILU(0) factorization with natural ordering 2188cf242676SKris Buschelman */ 2189899cda47SBarry Smith if (inA->rmap.bs < 8) { 2190cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2191cf242676SKris Buschelman } else { 2192c38d4ed2SBarry Smith if (!a->solve_work) { 2193899cda47SBarry Smith ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2194899cda47SBarry Smith ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2195c38d4ed2SBarry Smith } 21962d61bbb3SSatish Balay } 21972d61bbb3SSatish Balay 2198af281ebdSHong Zhang ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2199667159a5SBarry Smith 22002d61bbb3SSatish Balay PetscFunctionReturn(0); 22012d61bbb3SSatish Balay } 2202d9b7c43dSSatish Balay 2203fb2e594dSBarry Smith EXTERN_C_BEGIN 22044a2ae208SSatish Balay #undef __FUNCT__ 22054a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2206be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 220727a8da17SBarry Smith { 220827a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2209c1ac3661SBarry Smith PetscInt i,nz,nbs; 221027a8da17SBarry Smith 221127a8da17SBarry Smith PetscFunctionBegin; 221214db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 221314db4f74SSatish Balay nbs = baij->nbs; 221427a8da17SBarry Smith for (i=0; i<nz; i++) { 221527a8da17SBarry Smith baij->j[i] = indices[i]; 221627a8da17SBarry Smith } 221727a8da17SBarry Smith baij->nz = nz; 221814db4f74SSatish Balay for (i=0; i<nbs; i++) { 221927a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 222027a8da17SBarry Smith } 222127a8da17SBarry Smith 222227a8da17SBarry Smith PetscFunctionReturn(0); 222327a8da17SBarry Smith } 2224fb2e594dSBarry Smith EXTERN_C_END 222527a8da17SBarry Smith 22264a2ae208SSatish Balay #undef __FUNCT__ 22274a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 222827a8da17SBarry Smith /*@ 222927a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 223027a8da17SBarry Smith in the matrix. 223127a8da17SBarry Smith 223227a8da17SBarry Smith Input Parameters: 223327a8da17SBarry Smith + mat - the SeqBAIJ matrix 223427a8da17SBarry Smith - indices - the column indices 223527a8da17SBarry Smith 223615091d37SBarry Smith Level: advanced 223715091d37SBarry Smith 223827a8da17SBarry Smith Notes: 223927a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 224027a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 224127a8da17SBarry Smith of the MatSetValues() operation. 224227a8da17SBarry Smith 224327a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2244d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 224527a8da17SBarry Smith 224627a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 224727a8da17SBarry Smith 224827a8da17SBarry Smith @*/ 2249be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 225027a8da17SBarry Smith { 2251c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt *); 225227a8da17SBarry Smith 225327a8da17SBarry Smith PetscFunctionBegin; 22544482741eSBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 22554482741eSBarry Smith PetscValidPointer(indices,2); 2256c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 225727a8da17SBarry Smith if (f) { 225827a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 225927a8da17SBarry Smith } else { 2260634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 226127a8da17SBarry Smith } 226227a8da17SBarry Smith PetscFunctionReturn(0); 226327a8da17SBarry Smith } 226427a8da17SBarry Smith 22654a2ae208SSatish Balay #undef __FUNCT__ 2266985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2267985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2268273d9f13SBarry Smith { 2269273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2270dfbe8321SBarry Smith PetscErrorCode ierr; 2271c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2272273d9f13SBarry Smith PetscReal atmp; 227387828ca2SBarry Smith PetscScalar *x,zero = 0.0; 2274273d9f13SBarry Smith MatScalar *aa; 2275c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 2276273d9f13SBarry Smith 2277273d9f13SBarry Smith PetscFunctionBegin; 2278273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2279899cda47SBarry Smith bs = A->rmap.bs; 2280273d9f13SBarry Smith aa = a->a; 2281273d9f13SBarry Smith ai = a->i; 2282273d9f13SBarry Smith aj = a->j; 2283273d9f13SBarry Smith mbs = a->mbs; 2284273d9f13SBarry Smith 22852dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 2286985db425SBarry Smith if (idx) { 2287985db425SBarry Smith for (i=0; i<A->rmap.n;i++) idx[i] = 0; 2288985db425SBarry Smith } 22891ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2290273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2291899cda47SBarry Smith if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2292273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2293273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 2294273d9f13SBarry Smith brow = bs*i; 2295273d9f13SBarry Smith for (j=0; j<ncols; j++){ 2296273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 2297273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 2298273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 2299273d9f13SBarry Smith row = brow + krow; /* row index */ 2300a83599f4SBarry Smith /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2301985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2302273d9f13SBarry Smith } 2303273d9f13SBarry Smith } 2304273d9f13SBarry Smith aj++; 2305273d9f13SBarry Smith } 2306273d9f13SBarry Smith } 23071ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2308273d9f13SBarry Smith PetscFunctionReturn(0); 2309273d9f13SBarry Smith } 2310273d9f13SBarry Smith 23114a2ae208SSatish Balay #undef __FUNCT__ 23123c896bc6SHong Zhang #define __FUNCT__ "MatCopy_SeqBAIJ" 23133c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 23143c896bc6SHong Zhang { 23153c896bc6SHong Zhang PetscErrorCode ierr; 23163c896bc6SHong Zhang 23173c896bc6SHong Zhang PetscFunctionBegin; 23183c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 23193c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 23203c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 23213c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 23223c896bc6SHong Zhang 2323899cda47SBarry Smith if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 23243c896bc6SHong Zhang SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 23253c896bc6SHong Zhang } 2326899cda47SBarry Smith ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 23273c896bc6SHong Zhang } else { 23283c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 23293c896bc6SHong Zhang } 23303c896bc6SHong Zhang PetscFunctionReturn(0); 23313c896bc6SHong Zhang } 23323c896bc6SHong Zhang 23333c896bc6SHong Zhang #undef __FUNCT__ 23344a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2335dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2336273d9f13SBarry Smith { 2337dfbe8321SBarry Smith PetscErrorCode ierr; 2338273d9f13SBarry Smith 2339273d9f13SBarry Smith PetscFunctionBegin; 23407edd0491SSatish Balay ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2341273d9f13SBarry Smith PetscFunctionReturn(0); 2342273d9f13SBarry Smith } 2343273d9f13SBarry Smith 23444a2ae208SSatish Balay #undef __FUNCT__ 23454a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 2346dfbe8321SBarry Smith PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2347f2a5309cSSatish Balay { 2348f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2349f2a5309cSSatish Balay PetscFunctionBegin; 2350f2a5309cSSatish Balay *array = a->a; 2351f2a5309cSSatish Balay PetscFunctionReturn(0); 2352f2a5309cSSatish Balay } 2353f2a5309cSSatish Balay 23544a2ae208SSatish Balay #undef __FUNCT__ 23554a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2356dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2357f2a5309cSSatish Balay { 2358f2a5309cSSatish Balay PetscFunctionBegin; 2359f2a5309cSSatish Balay PetscFunctionReturn(0); 2360f2a5309cSSatish Balay } 2361f2a5309cSSatish Balay 236242ee4b1aSHong Zhang #include "petscblaslapack.h" 236342ee4b1aSHong Zhang #undef __FUNCT__ 236442ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 2365f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 236642ee4b1aSHong Zhang { 236742ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2368dfbe8321SBarry Smith PetscErrorCode ierr; 2369899cda47SBarry Smith PetscInt i,bs=Y->rmap.bs,j,bs2; 23704ce68768SBarry Smith PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 237142ee4b1aSHong Zhang 237242ee4b1aSHong Zhang PetscFunctionBegin; 237342ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2374f4df32b1SMatthew Knepley PetscScalar alpha = a; 2375f4df32b1SMatthew Knepley BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2376c537a176SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2377c4319e64SHong Zhang if (y->xtoy && y->XtoY != X) { 2378c4319e64SHong Zhang ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2379c4319e64SHong Zhang ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2380c537a176SHong Zhang } 2381c4319e64SHong Zhang if (!y->xtoy) { /* get xtoy */ 2382c4319e64SHong Zhang ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2383c4319e64SHong Zhang y->XtoY = X; 2384c537a176SHong Zhang } 2385c4319e64SHong Zhang bs2 = bs*bs; 2386c537a176SHong Zhang for (i=0; i<x->nz; i++) { 2387c4319e64SHong Zhang j = 0; 2388c4319e64SHong Zhang while (j < bs2){ 2389f4df32b1SMatthew Knepley y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2390c4319e64SHong Zhang j++; 2391c537a176SHong Zhang } 2392c4319e64SHong Zhang } 23931e2582c4SBarry Smith ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 239442ee4b1aSHong Zhang } else { 2395f4df32b1SMatthew Knepley ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 239642ee4b1aSHong Zhang } 239742ee4b1aSHong Zhang PetscFunctionReturn(0); 239842ee4b1aSHong Zhang } 239942ee4b1aSHong Zhang 240099cafbc1SBarry Smith #undef __FUNCT__ 240199cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_SeqBAIJ" 240299cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 240399cafbc1SBarry Smith { 240499cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 240599cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 240699cafbc1SBarry Smith PetscScalar *aa = a->a; 240799cafbc1SBarry Smith 240899cafbc1SBarry Smith PetscFunctionBegin; 240999cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 241099cafbc1SBarry Smith PetscFunctionReturn(0); 241199cafbc1SBarry Smith } 241299cafbc1SBarry Smith 241399cafbc1SBarry Smith #undef __FUNCT__ 241499cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 241599cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 241699cafbc1SBarry Smith { 241799cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 241899cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 241999cafbc1SBarry Smith PetscScalar *aa = a->a; 242099cafbc1SBarry Smith 242199cafbc1SBarry Smith PetscFunctionBegin; 242299cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 242399cafbc1SBarry Smith PetscFunctionReturn(0); 242499cafbc1SBarry Smith } 242599cafbc1SBarry Smith 242699cafbc1SBarry Smith 24272593348eSBarry Smith /* -------------------------------------------------------------------*/ 2428cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2429cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2430cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2431cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 243297304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 24337c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 24347c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2435cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 2436cc2dc46cSBarry Smith 0, 2437cc2dc46cSBarry Smith 0, 243897304618SKris Buschelman /*10*/ 0, 2439cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2440cc2dc46cSBarry Smith 0, 2441b6490206SBarry Smith 0, 2442f2501298SSatish Balay MatTranspose_SeqBAIJ, 244397304618SKris Buschelman /*15*/ MatGetInfo_SeqBAIJ, 2444cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2445cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2446cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2447cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 244897304618SKris Buschelman /*20*/ 0, 2449cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2450cc2dc46cSBarry Smith 0, 2451cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2452cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 245397304618SKris Buschelman /*25*/ MatZeroRows_SeqBAIJ, 2454cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 2455cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 2456c05c3958SHong Zhang MatCholeskyFactorSymbolic_SeqBAIJ, 2457c05c3958SHong Zhang MatCholeskyFactorNumeric_SeqBAIJ_N, 245897304618SKris Buschelman /*30*/ MatSetUpPreallocation_SeqBAIJ, 2459cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 2460c05c3958SHong Zhang MatICCFactorSymbolic_SeqBAIJ, 2461f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 2462f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 246397304618SKris Buschelman /*35*/ MatDuplicate_SeqBAIJ, 2464cc2dc46cSBarry Smith 0, 2465cc2dc46cSBarry Smith 0, 2466cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2467cc2dc46cSBarry Smith 0, 246897304618SKris Buschelman /*40*/ MatAXPY_SeqBAIJ, 2469cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 2470cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2471cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 24723c896bc6SHong Zhang MatCopy_SeqBAIJ, 24738c07d4e3SBarry Smith /*45*/ 0, 2474cc2dc46cSBarry Smith MatScale_SeqBAIJ, 2475cc2dc46cSBarry Smith 0, 2476cc2dc46cSBarry Smith 0, 2477cc2dc46cSBarry Smith 0, 2478521d7252SBarry Smith /*50*/ 0, 24793b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 248092c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 2481cc2dc46cSBarry Smith 0, 2482cc2dc46cSBarry Smith 0, 248397304618SKris Buschelman /*55*/ 0, 2484cc2dc46cSBarry Smith 0, 2485cc2dc46cSBarry Smith 0, 2486cc2dc46cSBarry Smith 0, 2487d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 248897304618SKris Buschelman /*60*/ MatGetSubMatrix_SeqBAIJ, 2489b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2490b9b97703SBarry Smith MatView_SeqBAIJ, 2491357abbc8SBarry Smith 0, 2492273d9f13SBarry Smith 0, 249397304618SKris Buschelman /*65*/ 0, 2494273d9f13SBarry Smith 0, 2495273d9f13SBarry Smith 0, 2496273d9f13SBarry Smith 0, 2497273d9f13SBarry Smith 0, 2498985db425SBarry Smith /*70*/ MatGetRowMaxAbs_SeqBAIJ, 249997304618SKris Buschelman MatConvert_Basic, 2500273d9f13SBarry Smith 0, 250197304618SKris Buschelman 0, 250297304618SKris Buschelman 0, 250397304618SKris Buschelman /*75*/ 0, 250497304618SKris Buschelman 0, 250597304618SKris Buschelman 0, 250697304618SKris Buschelman 0, 250797304618SKris Buschelman 0, 250897304618SKris Buschelman /*80*/ 0, 250997304618SKris Buschelman 0, 251097304618SKris Buschelman 0, 251197304618SKris Buschelman 0, 2512865e5f61SKris Buschelman MatLoad_SeqBAIJ, 2513865e5f61SKris Buschelman /*85*/ 0, 2514b01c7715SBarry Smith 0, 2515b01c7715SBarry Smith 0, 2516b01c7715SBarry Smith 0, 2517865e5f61SKris Buschelman 0, 2518865e5f61SKris Buschelman /*90*/ 0, 2519865e5f61SKris Buschelman 0, 2520865e5f61SKris Buschelman 0, 2521865e5f61SKris Buschelman 0, 2522865e5f61SKris Buschelman 0, 2523865e5f61SKris Buschelman /*95*/ 0, 2524865e5f61SKris Buschelman 0, 2525865e5f61SKris Buschelman 0, 252699cafbc1SBarry Smith 0, 252799cafbc1SBarry Smith 0, 252899cafbc1SBarry Smith /*100*/0, 252999cafbc1SBarry Smith 0, 253099cafbc1SBarry Smith 0, 253199cafbc1SBarry Smith 0, 253299cafbc1SBarry Smith 0, 253399cafbc1SBarry Smith /*105*/0, 253499cafbc1SBarry Smith MatRealPart_SeqBAIJ, 25352af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 25362af78befSBarry Smith 0, 25372af78befSBarry Smith 0, 25382af78befSBarry Smith /*110*/0, 25392af78befSBarry Smith 0, 25402af78befSBarry Smith 0, 25412af78befSBarry Smith 0, 25422af78befSBarry Smith MatMissingDiagonal_SeqBAIJ 25432af78befSBarry Smith /*115*/ 254499cafbc1SBarry Smith }; 25452593348eSBarry Smith 25463e90b805SBarry Smith EXTERN_C_BEGIN 25474a2ae208SSatish Balay #undef __FUNCT__ 25484a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2549be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 25503e90b805SBarry Smith { 25513e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2552899cda47SBarry Smith PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2553dfbe8321SBarry Smith PetscErrorCode ierr; 25543e90b805SBarry Smith 25553e90b805SBarry Smith PetscFunctionBegin; 25563e90b805SBarry Smith if (aij->nonew != 1) { 2557512a5fc5SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 25583e90b805SBarry Smith } 25593e90b805SBarry Smith 25603e90b805SBarry Smith /* allocate space for values if not already there */ 25613e90b805SBarry Smith if (!aij->saved_values) { 256287828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 25633e90b805SBarry Smith } 25643e90b805SBarry Smith 25653e90b805SBarry Smith /* copy values over */ 256687828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 25673e90b805SBarry Smith PetscFunctionReturn(0); 25683e90b805SBarry Smith } 25693e90b805SBarry Smith EXTERN_C_END 25703e90b805SBarry Smith 25713e90b805SBarry Smith EXTERN_C_BEGIN 25724a2ae208SSatish Balay #undef __FUNCT__ 25734a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2574be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 25753e90b805SBarry Smith { 25763e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 25776849ba73SBarry Smith PetscErrorCode ierr; 2578899cda47SBarry Smith PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 25793e90b805SBarry Smith 25803e90b805SBarry Smith PetscFunctionBegin; 25813e90b805SBarry Smith if (aij->nonew != 1) { 2582512a5fc5SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 25833e90b805SBarry Smith } 25843e90b805SBarry Smith if (!aij->saved_values) { 2585634064b4SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 25863e90b805SBarry Smith } 25873e90b805SBarry Smith 25883e90b805SBarry Smith /* copy values over */ 258987828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 25903e90b805SBarry Smith PetscFunctionReturn(0); 25913e90b805SBarry Smith } 25923e90b805SBarry Smith EXTERN_C_END 25933e90b805SBarry Smith 2594273d9f13SBarry Smith EXTERN_C_BEGIN 2595f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2596f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2597273d9f13SBarry Smith EXTERN_C_END 2598273d9f13SBarry Smith 2599273d9f13SBarry Smith EXTERN_C_BEGIN 26004a2ae208SSatish Balay #undef __FUNCT__ 2601a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2602be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2603a23d5eceSKris Buschelman { 2604a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 26056849ba73SBarry Smith PetscErrorCode ierr; 2606a96a251dSBarry Smith PetscInt i,mbs,nbs,bs2,newbs = bs; 2607ab93d7beSBarry Smith PetscTruth flg,skipallocation = PETSC_FALSE; 2608a23d5eceSKris Buschelman 2609a23d5eceSKris Buschelman PetscFunctionBegin; 2610a23d5eceSKris Buschelman 2611ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2612ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2613ab93d7beSBarry Smith nz = 0; 2614ab93d7beSBarry Smith } 26158c07d4e3SBarry Smith 26167adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 261717667f90SBarry Smith ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 26188c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 26198c07d4e3SBarry Smith 2620a23d5eceSKris Buschelman if (nnz && newbs != bs) { 2621634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2622a23d5eceSKris Buschelman } 2623a23d5eceSKris Buschelman bs = newbs; 2624a23d5eceSKris Buschelman 2625899cda47SBarry Smith B->rmap.bs = B->cmap.bs = bs; 26266148ca0dSBarry Smith ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 26276148ca0dSBarry Smith ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2628899cda47SBarry Smith 2629899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2630899cda47SBarry Smith 2631899cda47SBarry Smith mbs = B->rmap.n/bs; 2632899cda47SBarry Smith nbs = B->cmap.n/bs; 2633a23d5eceSKris Buschelman bs2 = bs*bs; 2634a23d5eceSKris Buschelman 2635899cda47SBarry Smith if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2636899cda47SBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2637a23d5eceSKris Buschelman } 2638a23d5eceSKris Buschelman 2639a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 264077431f27SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2641a23d5eceSKris Buschelman if (nnz) { 2642a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 264377431f27SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 264477431f27SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2645a23d5eceSKris Buschelman } 2646a23d5eceSKris Buschelman } 2647a23d5eceSKris Buschelman 2648a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 26497adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 26508c07d4e3SBarry Smith ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 26518c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 26528c07d4e3SBarry Smith 2653a23d5eceSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 2654a23d5eceSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2655a23d5eceSKris Buschelman if (!flg) { 2656a23d5eceSKris Buschelman switch (bs) { 2657a23d5eceSKris Buschelman case 1: 2658a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2659a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2660a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2661*6d3beeddSMatthew Knepley B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2662a23d5eceSKris Buschelman break; 2663a23d5eceSKris Buschelman case 2: 2664a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2665a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2666a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2667b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2668a23d5eceSKris Buschelman break; 2669a23d5eceSKris Buschelman case 3: 2670a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2671a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2672a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2673b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2674a23d5eceSKris Buschelman break; 2675a23d5eceSKris Buschelman case 4: 2676a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2677a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2678a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2679b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2680a23d5eceSKris Buschelman break; 2681a23d5eceSKris Buschelman case 5: 2682a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2683a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2684a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2685b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2686a23d5eceSKris Buschelman break; 2687a23d5eceSKris Buschelman case 6: 2688a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2689a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2690a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2691*6d3beeddSMatthew Knepley B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2692a23d5eceSKris Buschelman break; 2693a23d5eceSKris Buschelman case 7: 2694a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2695a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2696a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2697*6d3beeddSMatthew Knepley B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2698a23d5eceSKris Buschelman break; 2699a23d5eceSKris Buschelman default: 2700a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2701a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2702a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2703a23d5eceSKris Buschelman break; 2704a23d5eceSKris Buschelman } 2705a23d5eceSKris Buschelman } 2706899cda47SBarry Smith B->rmap.bs = bs; 2707a23d5eceSKris Buschelman b->mbs = mbs; 2708a23d5eceSKris Buschelman b->nbs = nbs; 2709ab93d7beSBarry Smith if (!skipallocation) { 27102ee49352SLisandro Dalcin if (!b->imax) { 2711ab93d7beSBarry Smith ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 27122ee49352SLisandro Dalcin } 2713ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2714ab93d7beSBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2715a23d5eceSKris Buschelman if (!nnz) { 2716a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2717a23d5eceSKris Buschelman else if (nz <= 0) nz = 1; 2718a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 2719a23d5eceSKris Buschelman nz = nz*mbs; 2720a23d5eceSKris Buschelman } else { 2721a23d5eceSKris Buschelman nz = 0; 2722a23d5eceSKris Buschelman for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2723a23d5eceSKris Buschelman } 2724a23d5eceSKris Buschelman 2725a23d5eceSKris Buschelman /* allocate the matrix space */ 27262ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2727899cda47SBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 27282ee49352SLisandro Dalcin ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2729a23d5eceSKris Buschelman ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2730c1ac3661SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2731a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 2732a23d5eceSKris Buschelman b->i[0] = 0; 2733a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 2734a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 2735a23d5eceSKris Buschelman } 2736e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2737e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2738e811da20SHong Zhang } else { 2739e6b907acSBarry Smith b->free_a = PETSC_FALSE; 2740e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 2741ab93d7beSBarry Smith } 2742a23d5eceSKris Buschelman 2743899cda47SBarry Smith B->rmap.bs = bs; 2744a23d5eceSKris Buschelman b->bs2 = bs2; 2745a23d5eceSKris Buschelman b->mbs = mbs; 2746a23d5eceSKris Buschelman b->nz = 0; 2747a23d5eceSKris Buschelman b->maxnz = nz*bs2; 2748a23d5eceSKris Buschelman B->info.nz_unneeded = (PetscReal)b->maxnz; 2749a23d5eceSKris Buschelman PetscFunctionReturn(0); 2750a23d5eceSKris Buschelman } 2751a23d5eceSKris Buschelman EXTERN_C_END 2752a23d5eceSKris Buschelman 27530bad9183SKris Buschelman /*MC 2754fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 27550bad9183SKris Buschelman block sparse compressed row format. 27560bad9183SKris Buschelman 27570bad9183SKris Buschelman Options Database Keys: 27580bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 27590bad9183SKris Buschelman 27600bad9183SKris Buschelman Level: beginner 27610bad9183SKris Buschelman 2762f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 27630bad9183SKris Buschelman M*/ 27640bad9183SKris Buschelman 2765a23d5eceSKris Buschelman EXTERN_C_BEGIN 2766a23d5eceSKris Buschelman #undef __FUNCT__ 27674a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 2768be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 27692593348eSBarry Smith { 2770dfbe8321SBarry Smith PetscErrorCode ierr; 2771c1ac3661SBarry Smith PetscMPIInt size; 2772b6490206SBarry Smith Mat_SeqBAIJ *b; 27733b2fbd54SBarry Smith 27743a40ed3dSBarry Smith PetscFunctionBegin; 27757adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 277629bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2777b6490206SBarry Smith 277838f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2779b0a32e0cSBarry Smith B->data = (void*)b; 2780549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 27812593348eSBarry Smith B->factor = 0; 278290f02eecSBarry Smith B->mapping = 0; 27832593348eSBarry Smith b->row = 0; 27842593348eSBarry Smith b->col = 0; 2785e51c0b9cSSatish Balay b->icol = 0; 27862593348eSBarry Smith b->reallocs = 0; 27873e90b805SBarry Smith b->saved_values = 0; 2788cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2789563b5814SBarry Smith b->setvalueslen = 0; 2790563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 2791563b5814SBarry Smith #endif 27922593348eSBarry Smith 2793c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 27942593348eSBarry Smith b->nonew = 0; 27952593348eSBarry Smith b->diag = 0; 27962593348eSBarry Smith b->solve_work = 0; 2797de6a44a3SBarry Smith b->mult_work = 0; 27982a1b7f2aSHong Zhang B->spptr = 0; 27990e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2800883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 2801c4319e64SHong Zhang b->xtoy = 0; 2802c4319e64SHong Zhang b->XtoY = 0; 280373e7a558SHong Zhang b->compressedrow.use = PETSC_FALSE; 280426e093fcSHong Zhang b->compressedrow.nrows = 0; 280573e7a558SHong Zhang b->compressedrow.i = PETSC_NULL; 280673e7a558SHong Zhang b->compressedrow.rindex = PETSC_NULL; 280773e7a558SHong Zhang b->compressedrow.checked = PETSC_FALSE; 280888e51ccdSHong Zhang B->same_nonzero = PETSC_FALSE; 28094e220ebcSLois Curfman McInnes 281043516a2dSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 281143516a2dSKris Buschelman "MatInvertBlockDiagonal_SeqBAIJ", 281243516a2dSKris Buschelman MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2813f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 28143e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 2815bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2816f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 28173e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 2818bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2819f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 282027a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2821bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2822a6175056SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2823273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 2824273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2825a0e1a404SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2826a0e1a404SHong Zhang "MatConvert_SeqBAIJ_SeqSBAIJ", 2827a0e1a404SHong Zhang MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2828a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2829a23d5eceSKris Buschelman "MatSeqBAIJSetPreallocation_SeqBAIJ", 2830a23d5eceSKris Buschelman MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 283117667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 28323a40ed3dSBarry Smith PetscFunctionReturn(0); 28332593348eSBarry Smith } 2834273d9f13SBarry Smith EXTERN_C_END 28352593348eSBarry Smith 28364a2ae208SSatish Balay #undef __FUNCT__ 28374a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2838dfbe8321SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 28392593348eSBarry Smith { 28402593348eSBarry Smith Mat C; 2841b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 28426849ba73SBarry Smith PetscErrorCode ierr; 2843a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2844de6a44a3SBarry Smith 28453a40ed3dSBarry Smith PetscFunctionBegin; 284629bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 28472593348eSBarry Smith 28482593348eSBarry Smith *B = 0; 28497adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2850899cda47SBarry Smith ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 28517adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 28521d5dac46SHong Zhang ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2853273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 285444cd7ae7SLois Curfman McInnes 2855899cda47SBarry Smith C->rmap.N = A->rmap.N; 2856899cda47SBarry Smith C->cmap.N = A->cmap.N; 2857899cda47SBarry Smith C->rmap.bs = A->rmap.bs; 28589df24120SSatish Balay c->bs2 = a->bs2; 2859b6490206SBarry Smith c->mbs = a->mbs; 2860b6490206SBarry Smith c->nbs = a->nbs; 28612593348eSBarry Smith 286233b91e9fSSatish Balay ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2863b6490206SBarry Smith for (i=0; i<mbs; i++) { 28642593348eSBarry Smith c->imax[i] = a->imax[i]; 28652593348eSBarry Smith c->ilen[i] = a->ilen[i]; 28662593348eSBarry Smith } 28672593348eSBarry Smith 28682593348eSBarry Smith /* allocate the matrix space */ 2869a96a251dSBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2870c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 2871c1ac3661SBarry Smith ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2872b6490206SBarry Smith if (mbs > 0) { 2873c1ac3661SBarry Smith ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 28742e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 2875549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 28762e8a6d31SBarry Smith } else { 2877549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 28782593348eSBarry Smith } 28792593348eSBarry Smith } 28802593348eSBarry Smith c->roworiented = a->roworiented; 28812593348eSBarry Smith c->nonew = a->nonew; 28822593348eSBarry Smith 28832593348eSBarry Smith if (a->diag) { 2884c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 288552e6d16bSBarry Smith ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2886b6490206SBarry Smith for (i=0; i<mbs; i++) { 28872593348eSBarry Smith c->diag[i] = a->diag[i]; 28882593348eSBarry Smith } 288998305bb5SBarry Smith } else c->diag = 0; 28902593348eSBarry Smith c->nz = a->nz; 28912593348eSBarry Smith c->maxnz = a->maxnz; 28922593348eSBarry Smith c->solve_work = 0; 28937fc0212eSBarry Smith c->mult_work = 0; 2894e6b907acSBarry Smith c->free_a = PETSC_TRUE; 2895e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 2896273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 2897273d9f13SBarry Smith C->assembled = PETSC_TRUE; 289888e51ccdSHong Zhang 289988e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 290088e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 290188e51ccdSHong Zhang c->compressedrow.checked = a->compressedrow.checked; 290288e51ccdSHong Zhang if ( a->compressedrow.checked && a->compressedrow.use){ 290388e51ccdSHong Zhang i = a->compressedrow.nrows; 290488e51ccdSHong Zhang ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 290588e51ccdSHong Zhang c->compressedrow.rindex = c->compressedrow.i + i + 1; 290688e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 290788e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 290888e51ccdSHong Zhang } else { 290988e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 291088e51ccdSHong Zhang c->compressedrow.i = PETSC_NULL; 291188e51ccdSHong Zhang c->compressedrow.rindex = PETSC_NULL; 291288e51ccdSHong Zhang } 291388e51ccdSHong Zhang C->same_nonzero = A->same_nonzero; 29142593348eSBarry Smith *B = C; 29157adad957SLisandro Dalcin ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 29163a40ed3dSBarry Smith PetscFunctionReturn(0); 29172593348eSBarry Smith } 29182593348eSBarry Smith 29194a2ae208SSatish Balay #undef __FUNCT__ 29204a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 2921f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 29222593348eSBarry Smith { 2923b6490206SBarry Smith Mat_SeqBAIJ *a; 29242593348eSBarry Smith Mat B; 29256849ba73SBarry Smith PetscErrorCode ierr; 2926b24ad042SBarry Smith PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2927c1ac3661SBarry Smith PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2928c1ac3661SBarry Smith PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2929c1ac3661SBarry Smith PetscInt *masked,nmask,tmp,bs2,ishift; 2930b24ad042SBarry Smith PetscMPIInt size; 2931b24ad042SBarry Smith int fd; 293287828ca2SBarry Smith PetscScalar *aa; 293319bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 29342593348eSBarry Smith 29353a40ed3dSBarry Smith PetscFunctionBegin; 29368c07d4e3SBarry Smith ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 29378c07d4e3SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 29388c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2939de6a44a3SBarry Smith bs2 = bs*bs; 2940b6490206SBarry Smith 2941d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 294229bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2943b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 29440752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2945552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 29462593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 29472593348eSBarry Smith 2948d64ed03dSBarry Smith if (header[3] < 0) { 294929bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2950d64ed03dSBarry Smith } 2951d64ed03dSBarry Smith 295229bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 295335aab85fSBarry Smith 295435aab85fSBarry Smith /* 295535aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 295635aab85fSBarry Smith divisible by the blocksize 295735aab85fSBarry Smith */ 2958b6490206SBarry Smith mbs = M/bs; 295935aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 296035aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 296135aab85fSBarry Smith else mbs++; 296235aab85fSBarry Smith if (extra_rows) { 29631e2582c4SBarry Smith ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 296435aab85fSBarry Smith } 2965b6490206SBarry Smith 29662593348eSBarry Smith /* read in row lengths */ 2967c1ac3661SBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 29680752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 296935aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 29702593348eSBarry Smith 2971b6490206SBarry Smith /* read in column indices */ 2972c1ac3661SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 29730752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 297435aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2975b6490206SBarry Smith 2976b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2977c1ac3661SBarry Smith ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2978c1ac3661SBarry Smith ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2979c1ac3661SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2980c1ac3661SBarry Smith ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 298135aab85fSBarry Smith masked = mask + mbs; 2982b6490206SBarry Smith rowcount = 0; nzcount = 0; 2983b6490206SBarry Smith for (i=0; i<mbs; i++) { 298435aab85fSBarry Smith nmask = 0; 2985b6490206SBarry Smith for (j=0; j<bs; j++) { 2986b6490206SBarry Smith kmax = rowlengths[rowcount]; 2987b6490206SBarry Smith for (k=0; k<kmax; k++) { 298835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 298935aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2990b6490206SBarry Smith } 2991b6490206SBarry Smith rowcount++; 2992b6490206SBarry Smith } 299335aab85fSBarry Smith browlengths[i] += nmask; 299435aab85fSBarry Smith /* zero out the mask elements we set */ 299535aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2996b6490206SBarry Smith } 2997b6490206SBarry Smith 29982593348eSBarry Smith /* create our matrix */ 2999f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&B); 3000f69a0ea3SMatthew Knepley ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 300178ae41b4SKris Buschelman ierr = MatSetType(B,type);CHKERRQ(ierr); 3002ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3003b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 30042593348eSBarry Smith 30052593348eSBarry Smith /* set matrix "i" values */ 3006de6a44a3SBarry Smith a->i[0] = 0; 3007b6490206SBarry Smith for (i=1; i<= mbs; i++) { 3008b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 3009b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 30102593348eSBarry Smith } 3011b6490206SBarry Smith a->nz = 0; 3012b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 30132593348eSBarry Smith 3014b6490206SBarry Smith /* read in nonzero values */ 301587828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 30160752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 301735aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3018b6490206SBarry Smith 3019b6490206SBarry Smith /* set "a" and "j" values into matrix */ 3020b6490206SBarry Smith nzcount = 0; jcount = 0; 3021b6490206SBarry Smith for (i=0; i<mbs; i++) { 3022b6490206SBarry Smith nzcountb = nzcount; 302335aab85fSBarry Smith nmask = 0; 3024b6490206SBarry Smith for (j=0; j<bs; j++) { 3025b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 3026b6490206SBarry Smith for (k=0; k<kmax; k++) { 302735aab85fSBarry Smith tmp = jj[nzcount++]/bs; 302835aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3029b6490206SBarry Smith } 3030b6490206SBarry Smith } 3031de6a44a3SBarry Smith /* sort the masked values */ 3032433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3033de6a44a3SBarry Smith 3034b6490206SBarry Smith /* set "j" values into matrix */ 3035b6490206SBarry Smith maskcount = 1; 303635aab85fSBarry Smith for (j=0; j<nmask; j++) { 303735aab85fSBarry Smith a->j[jcount++] = masked[j]; 3038de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 3039b6490206SBarry Smith } 3040b6490206SBarry Smith /* set "a" values into matrix */ 3041de6a44a3SBarry Smith ishift = bs2*a->i[i]; 3042b6490206SBarry Smith for (j=0; j<bs; j++) { 3043b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 3044b6490206SBarry Smith for (k=0; k<kmax; k++) { 3045de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 3046de6a44a3SBarry Smith block = mask[tmp] - 1; 3047de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 3048de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 3049375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 3050b6490206SBarry Smith } 3051b6490206SBarry Smith } 305235aab85fSBarry Smith /* zero out the mask elements we set */ 305335aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3054b6490206SBarry Smith } 305529bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3056b6490206SBarry Smith 3057606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3058606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 3059606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 3060606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 3061606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 3062b6490206SBarry Smith 306378ae41b4SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 306478ae41b4SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30659c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 306678ae41b4SKris Buschelman 306778ae41b4SKris Buschelman *A = B; 30683a40ed3dSBarry Smith PetscFunctionReturn(0); 30692593348eSBarry Smith } 30702593348eSBarry Smith 30714a2ae208SSatish Balay #undef __FUNCT__ 30724a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 3073273d9f13SBarry Smith /*@C 3074273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3075273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3076273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3077273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3078273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 30792593348eSBarry Smith 3080273d9f13SBarry Smith Collective on MPI_Comm 3081273d9f13SBarry Smith 3082273d9f13SBarry Smith Input Parameters: 3083273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3084273d9f13SBarry Smith . bs - size of block 3085273d9f13SBarry Smith . m - number of rows 3086273d9f13SBarry Smith . n - number of columns 308735d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 308835d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 3089273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 3090273d9f13SBarry Smith 3091273d9f13SBarry Smith Output Parameter: 3092273d9f13SBarry Smith . A - the matrix 3093273d9f13SBarry Smith 3094175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3095175b88e8SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3096175b88e8SBarry Smith true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3097175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3098175b88e8SBarry Smith 3099273d9f13SBarry Smith Options Database Keys: 3100273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 3101273d9f13SBarry Smith block calculations (much slower) 3102273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 3103273d9f13SBarry Smith 3104273d9f13SBarry Smith Level: intermediate 3105273d9f13SBarry Smith 3106273d9f13SBarry Smith Notes: 3107d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3108d1be2dadSMatthew Knepley 310949a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 311049a6f317SBarry Smith 311135d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 311235d8aa7fSBarry Smith 3113273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3114273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3115273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3116273d9f13SBarry Smith 3117273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 3118273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3119273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 3120273d9f13SBarry Smith matrices. 3121273d9f13SBarry Smith 3122273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3123273d9f13SBarry Smith @*/ 3124be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3125273d9f13SBarry Smith { 3126dfbe8321SBarry Smith PetscErrorCode ierr; 3127273d9f13SBarry Smith 3128273d9f13SBarry Smith PetscFunctionBegin; 3129f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3130f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3131273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3132ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3133273d9f13SBarry Smith PetscFunctionReturn(0); 3134273d9f13SBarry Smith } 3135273d9f13SBarry Smith 31364a2ae208SSatish Balay #undef __FUNCT__ 31374a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3138273d9f13SBarry Smith /*@C 3139273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3140273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3141273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3142273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3143273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3144273d9f13SBarry Smith 3145273d9f13SBarry Smith Collective on MPI_Comm 3146273d9f13SBarry Smith 3147273d9f13SBarry Smith Input Parameters: 3148273d9f13SBarry Smith + A - the matrix 3149273d9f13SBarry Smith . bs - size of block 3150273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3151273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 3152273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 3153273d9f13SBarry Smith 3154273d9f13SBarry Smith Options Database Keys: 3155273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 3156273d9f13SBarry Smith block calculations (much slower) 3157273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 3158273d9f13SBarry Smith 3159273d9f13SBarry Smith Level: intermediate 3160273d9f13SBarry Smith 3161273d9f13SBarry Smith Notes: 316249a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 316349a6f317SBarry Smith 3164aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3165aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3166aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3167aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3168aa95bbe8SBarry Smith 3169273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3170273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3171273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3172273d9f13SBarry Smith 3173273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 3174273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3175273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 3176273d9f13SBarry Smith matrices. 3177273d9f13SBarry Smith 3178aa95bbe8SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3179273d9f13SBarry Smith @*/ 3180be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3181273d9f13SBarry Smith { 3182c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3183273d9f13SBarry Smith 3184273d9f13SBarry Smith PetscFunctionBegin; 3185a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3186a23d5eceSKris Buschelman if (f) { 3187a23d5eceSKris Buschelman ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3188273d9f13SBarry Smith } 3189273d9f13SBarry Smith PetscFunctionReturn(0); 3190273d9f13SBarry Smith } 3191a1d92eedSBarry Smith 3192c75a6043SHong Zhang #undef __FUNCT__ 3193c75a6043SHong Zhang #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3194c75a6043SHong Zhang /*@ 3195c75a6043SHong Zhang MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3196c75a6043SHong Zhang (upper triangular entries in CSR format) provided by the user. 3197c75a6043SHong Zhang 3198c75a6043SHong Zhang Collective on MPI_Comm 3199c75a6043SHong Zhang 3200c75a6043SHong Zhang Input Parameters: 3201c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3202c75a6043SHong Zhang . bs - size of block 3203c75a6043SHong Zhang . m - number of rows 3204c75a6043SHong Zhang . n - number of columns 3205c75a6043SHong Zhang . i - row indices 3206c75a6043SHong Zhang . j - column indices 3207c75a6043SHong Zhang - a - matrix values 3208c75a6043SHong Zhang 3209c75a6043SHong Zhang Output Parameter: 3210c75a6043SHong Zhang . mat - the matrix 3211c75a6043SHong Zhang 3212c75a6043SHong Zhang Level: intermediate 3213c75a6043SHong Zhang 3214c75a6043SHong Zhang Notes: 3215c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3216c75a6043SHong Zhang once the matrix is destroyed 3217c75a6043SHong Zhang 3218c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3219c75a6043SHong Zhang 3220c75a6043SHong Zhang The i and j indices are 0 based 3221c75a6043SHong Zhang 3222c75a6043SHong Zhang .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3223c75a6043SHong Zhang 3224c75a6043SHong Zhang @*/ 3225c75a6043SHong Zhang PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3226c75a6043SHong Zhang { 3227c75a6043SHong Zhang PetscErrorCode ierr; 3228c75a6043SHong Zhang PetscInt ii; 3229c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3230c75a6043SHong Zhang 3231c75a6043SHong Zhang PetscFunctionBegin; 3232c75a6043SHong Zhang if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3233c75a6043SHong Zhang if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3234c75a6043SHong Zhang 3235c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3236c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3237c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3238c75a6043SHong Zhang ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3239c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 3240c75a6043SHong Zhang ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3241c75a6043SHong Zhang 3242c75a6043SHong Zhang baij->i = i; 3243c75a6043SHong Zhang baij->j = j; 3244c75a6043SHong Zhang baij->a = a; 3245c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3246c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3247e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3248e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3249c75a6043SHong Zhang 3250c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 3251c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3252c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 3253c75a6043SHong Zhang if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3254c75a6043SHong Zhang #endif 3255c75a6043SHong Zhang } 3256c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 3257c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 3258c75a6043SHong Zhang if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3259c75a6043SHong Zhang if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3260c75a6043SHong Zhang } 3261c75a6043SHong Zhang #endif 3262c75a6043SHong Zhang 3263c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265c75a6043SHong Zhang PetscFunctionReturn(0); 3266c75a6043SHong Zhang } 3267