12593348eSBarry Smith /* 2b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 32593348eSBarry Smith matrix storage format. 42593348eSBarry Smith */ 570f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 61a6a6d98SBarry Smith #include "src/inline/spops.h" 7325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 83b547af2SSatish Balay 9b01c7715SBarry Smith #include "src/inline/ilu.h" 10b01c7715SBarry Smith 11b01c7715SBarry Smith #undef __FUNCT__ 12*43516a2dSKris Buschelman #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13*43516a2dSKris Buschelman /*@C 14*43516a2dSKris Buschelman MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15*43516a2dSKris Buschelman 16*43516a2dSKris Buschelman Collective on Mat 17*43516a2dSKris Buschelman 18*43516a2dSKris Buschelman Input Parameters: 19*43516a2dSKris Buschelman . mat - the matrix 20*43516a2dSKris Buschelman 21*43516a2dSKris Buschelman Level: advanced 22*43516a2dSKris Buschelman @*/ 23*43516a2dSKris Buschelman PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 24*43516a2dSKris Buschelman { 25*43516a2dSKris Buschelman PetscErrorCode ierr,(*f)(Mat); 26*43516a2dSKris Buschelman 27*43516a2dSKris Buschelman PetscFunctionBegin; 28*43516a2dSKris Buschelman PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 29*43516a2dSKris Buschelman if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30*43516a2dSKris Buschelman if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31*43516a2dSKris Buschelman 32*43516a2dSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33*43516a2dSKris Buschelman if (f) { 34*43516a2dSKris Buschelman ierr = (*f)(mat);CHKERRQ(ierr); 35*43516a2dSKris Buschelman } else { 36*43516a2dSKris Buschelman SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37*43516a2dSKris Buschelman } 38*43516a2dSKris Buschelman PetscFunctionReturn(0); 39*43516a2dSKris Buschelman } 40*43516a2dSKris Buschelman 41*43516a2dSKris Buschelman EXTERN_C_BEGIN 42*43516a2dSKris Buschelman #undef __FUNCT__ 43b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44dfbe8321SBarry Smith PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45b01c7715SBarry Smith { 46b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 476849ba73SBarry Smith PetscErrorCode ierr; 48521d7252SBarry Smith PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 49b01c7715SBarry Smith PetscScalar *v = a->a,*odiag,*diag,*mdiag; 50b01c7715SBarry Smith 51b01c7715SBarry Smith PetscFunctionBegin; 52b01c7715SBarry Smith if (a->idiagvalid) PetscFunctionReturn(0); 53b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 54b01c7715SBarry Smith diag_offset = a->diag; 55b01c7715SBarry Smith if (!a->idiag) { 56b01c7715SBarry Smith ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 57b01c7715SBarry Smith } 58b01c7715SBarry Smith diag = a->idiag; 59b01c7715SBarry Smith mdiag = a->idiag+bs*bs*mbs; 60b01c7715SBarry Smith /* factor and invert each block */ 61521d7252SBarry Smith switch (bs){ 62b01c7715SBarry Smith case 2: 63b01c7715SBarry Smith for (i=0; i<mbs; i++) { 64b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 65b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 66b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 67b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 68b01c7715SBarry Smith diag += 4; 69b01c7715SBarry Smith mdiag += 4; 70b01c7715SBarry Smith } 71b01c7715SBarry Smith break; 72b01c7715SBarry Smith case 3: 73b01c7715SBarry Smith for (i=0; i<mbs; i++) { 74b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 75b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 76b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 77b01c7715SBarry Smith diag[8] = odiag[8]; 78b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79b01c7715SBarry Smith mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 80b01c7715SBarry Smith mdiag[8] = odiag[8]; 81b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 82b01c7715SBarry Smith diag += 9; 83b01c7715SBarry Smith mdiag += 9; 84b01c7715SBarry Smith } 85b01c7715SBarry Smith break; 86b01c7715SBarry Smith case 4: 87b01c7715SBarry Smith for (i=0; i<mbs; i++) { 88b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 89b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 90b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 91b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 92b01c7715SBarry Smith diag += 16; 93b01c7715SBarry Smith mdiag += 16; 94b01c7715SBarry Smith } 95b01c7715SBarry Smith break; 96b01c7715SBarry Smith case 5: 97b01c7715SBarry Smith for (i=0; i<mbs; i++) { 98b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 99b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 100b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 101b01c7715SBarry Smith ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 102b01c7715SBarry Smith diag += 25; 103b01c7715SBarry Smith mdiag += 25; 104b01c7715SBarry Smith } 105b01c7715SBarry Smith break; 106b01c7715SBarry Smith default: 107521d7252SBarry Smith SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 108b01c7715SBarry Smith } 109b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 110b01c7715SBarry Smith PetscFunctionReturn(0); 111b01c7715SBarry Smith } 112*43516a2dSKris Buschelman EXTERN_C_END 113b01c7715SBarry Smith 114b01c7715SBarry Smith #undef __FUNCT__ 115b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 116c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 117b01c7715SBarry Smith { 118b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 119b01c7715SBarry Smith PetscScalar *x,x1,x2,s1,s2; 120b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 121dfbe8321SBarry Smith PetscErrorCode ierr; 122c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 123c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 124b01c7715SBarry Smith 125b01c7715SBarry Smith PetscFunctionBegin; 126b01c7715SBarry Smith its = its*lits; 12777431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 128b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 129b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 130b01c7715SBarry Smith if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 131b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 132b01c7715SBarry Smith 133b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 134b01c7715SBarry Smith 135b01c7715SBarry Smith diag = a->diag; 136b01c7715SBarry Smith idiag = a->idiag; 1371ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1381ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 139b01c7715SBarry Smith 140b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 141b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 142b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 143b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 144b01c7715SBarry Smith i2 = 2; 145b01c7715SBarry Smith idiag += 4; 146b01c7715SBarry Smith for (i=1; i<m; i++) { 147b01c7715SBarry Smith v = aa + 4*ai[i]; 148b01c7715SBarry Smith vi = aj + ai[i]; 149b01c7715SBarry Smith nz = diag[i] - ai[i]; 150b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; 151b01c7715SBarry Smith while (nz--) { 152b01c7715SBarry Smith idx = 2*(*vi++); 153b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 154b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 155b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 156b01c7715SBarry Smith v += 4; 157b01c7715SBarry Smith } 158b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 159b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 160b01c7715SBarry Smith idiag += 4; 161b01c7715SBarry Smith i2 += 2; 162b01c7715SBarry Smith } 163b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 164efee365bSSatish Balay ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 165b01c7715SBarry Smith } 166b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 167b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 168b01c7715SBarry Smith i2 = 0; 169b01c7715SBarry Smith mdiag = a->idiag+4*a->mbs; 170b01c7715SBarry Smith for (i=0; i<m; i++) { 171b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 172b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 173b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 174b01c7715SBarry Smith mdiag += 4; 175b01c7715SBarry Smith i2 += 2; 176b01c7715SBarry Smith } 177efee365bSSatish Balay ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 178b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 179b01c7715SBarry Smith ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 180b01c7715SBarry Smith } 181b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 182b01c7715SBarry Smith idiag = a->idiag+4*a->mbs - 4; 183b01c7715SBarry Smith i2 = 2*m - 2; 184b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 185b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[2]*x2; 186b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 187b01c7715SBarry Smith idiag -= 4; 188b01c7715SBarry Smith i2 -= 2; 189b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 190b01c7715SBarry Smith v = aa + 4*(diag[i]+1); 191b01c7715SBarry Smith vi = aj + diag[i] + 1; 192b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 193b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; 194b01c7715SBarry Smith while (nz--) { 195b01c7715SBarry Smith idx = 2*(*vi++); 196b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 197b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 198b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 199b01c7715SBarry Smith v += 4; 200b01c7715SBarry Smith } 201b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 202b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 203b01c7715SBarry Smith idiag -= 4; 204b01c7715SBarry Smith i2 -= 2; 205b01c7715SBarry Smith } 206efee365bSSatish Balay ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 207b01c7715SBarry Smith } 208b01c7715SBarry Smith } else { 209634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 210b01c7715SBarry Smith } 2111ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2121ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 213b01c7715SBarry Smith PetscFunctionReturn(0); 214b01c7715SBarry Smith } 215b01c7715SBarry Smith 216b01c7715SBarry Smith #undef __FUNCT__ 217b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 218c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 219b01c7715SBarry Smith { 220b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 221b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,s1,s2,s3; 222b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 223dfbe8321SBarry Smith PetscErrorCode ierr; 224c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 225c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 226b01c7715SBarry Smith 227b01c7715SBarry Smith PetscFunctionBegin; 228b01c7715SBarry Smith its = its*lits; 22977431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232b01c7715SBarry Smith if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 233b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234b01c7715SBarry Smith 235b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236b01c7715SBarry Smith 237b01c7715SBarry Smith diag = a->diag; 238b01c7715SBarry Smith idiag = a->idiag; 2391ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2401ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241b01c7715SBarry Smith 242b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 243b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 245b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 246b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 247b01c7715SBarry Smith i2 = 3; 248b01c7715SBarry Smith idiag += 9; 249b01c7715SBarry Smith for (i=1; i<m; i++) { 250b01c7715SBarry Smith v = aa + 9*ai[i]; 251b01c7715SBarry Smith vi = aj + ai[i]; 252b01c7715SBarry Smith nz = diag[i] - ai[i]; 253b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 254b01c7715SBarry Smith while (nz--) { 255b01c7715SBarry Smith idx = 3*(*vi++); 256b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 257b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 258b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 259b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 260b01c7715SBarry Smith v += 9; 261b01c7715SBarry Smith } 262b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 263b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 264b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 265b01c7715SBarry Smith idiag += 9; 266b01c7715SBarry Smith i2 += 3; 267b01c7715SBarry Smith } 268b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 269efee365bSSatish Balay ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 270b01c7715SBarry Smith } 271b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 272b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 273b01c7715SBarry Smith i2 = 0; 274b01c7715SBarry Smith mdiag = a->idiag+9*a->mbs; 275b01c7715SBarry Smith for (i=0; i<m; i++) { 276b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 277b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 278b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 279b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 280b01c7715SBarry Smith mdiag += 9; 281b01c7715SBarry Smith i2 += 3; 282b01c7715SBarry Smith } 283efee365bSSatish Balay ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 284b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285b01c7715SBarry Smith ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 286b01c7715SBarry Smith } 287b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288b01c7715SBarry Smith idiag = a->idiag+9*a->mbs - 9; 289b01c7715SBarry Smith i2 = 3*m - 3; 290b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 291b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 292b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 293b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 294b01c7715SBarry Smith idiag -= 9; 295b01c7715SBarry Smith i2 -= 3; 296b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 297b01c7715SBarry Smith v = aa + 9*(diag[i]+1); 298b01c7715SBarry Smith vi = aj + diag[i] + 1; 299b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 300b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 301b01c7715SBarry Smith while (nz--) { 302b01c7715SBarry Smith idx = 3*(*vi++); 303b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 304b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 305b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 306b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 307b01c7715SBarry Smith v += 9; 308b01c7715SBarry Smith } 309b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 310b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 311b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 312b01c7715SBarry Smith idiag -= 9; 313b01c7715SBarry Smith i2 -= 3; 314b01c7715SBarry Smith } 315efee365bSSatish Balay ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 316b01c7715SBarry Smith } 317b01c7715SBarry Smith } else { 318634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 319b01c7715SBarry Smith } 3201ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3211ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 322b01c7715SBarry Smith PetscFunctionReturn(0); 323b01c7715SBarry Smith } 324b01c7715SBarry Smith 325b01c7715SBarry Smith #undef __FUNCT__ 326b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 327c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 328b01c7715SBarry Smith { 329b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 330b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 331b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 332dfbe8321SBarry Smith PetscErrorCode ierr; 333c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 334c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 335b01c7715SBarry Smith 336b01c7715SBarry Smith PetscFunctionBegin; 337b01c7715SBarry Smith its = its*lits; 33877431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 339b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 340b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 341b01c7715SBarry Smith if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 342b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 343b01c7715SBarry Smith 344b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 345b01c7715SBarry Smith 346b01c7715SBarry Smith diag = a->diag; 347b01c7715SBarry Smith idiag = a->idiag; 3481ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3491ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 350b01c7715SBarry Smith 351b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 352b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 353b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 354b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 355b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 356b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 357b01c7715SBarry Smith i2 = 4; 358b01c7715SBarry Smith idiag += 16; 359b01c7715SBarry Smith for (i=1; i<m; i++) { 360b01c7715SBarry Smith v = aa + 16*ai[i]; 361b01c7715SBarry Smith vi = aj + ai[i]; 362b01c7715SBarry Smith nz = diag[i] - ai[i]; 363b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 364b01c7715SBarry Smith while (nz--) { 365b01c7715SBarry Smith idx = 4*(*vi++); 366b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 367b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 368b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 369b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 370b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 371b01c7715SBarry Smith v += 16; 372b01c7715SBarry Smith } 373b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 374b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 375b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 376b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 377b01c7715SBarry Smith idiag += 16; 378b01c7715SBarry Smith i2 += 4; 379b01c7715SBarry Smith } 380b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 381efee365bSSatish Balay ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 382b01c7715SBarry Smith } 383b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 384b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 385b01c7715SBarry Smith i2 = 0; 386b01c7715SBarry Smith mdiag = a->idiag+16*a->mbs; 387b01c7715SBarry Smith for (i=0; i<m; i++) { 388b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 389b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 390b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 391b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 392b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 393b01c7715SBarry Smith mdiag += 16; 394b01c7715SBarry Smith i2 += 4; 395b01c7715SBarry Smith } 396efee365bSSatish Balay ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 397b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 398b01c7715SBarry Smith ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 399b01c7715SBarry Smith } 400b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 401b01c7715SBarry Smith idiag = a->idiag+16*a->mbs - 16; 402b01c7715SBarry Smith i2 = 4*m - 4; 403b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 404b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 405b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 406b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 407b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 408b01c7715SBarry Smith idiag -= 16; 409b01c7715SBarry Smith i2 -= 4; 410b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 411b01c7715SBarry Smith v = aa + 16*(diag[i]+1); 412b01c7715SBarry Smith vi = aj + diag[i] + 1; 413b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 414b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 415b01c7715SBarry Smith while (nz--) { 416b01c7715SBarry Smith idx = 4*(*vi++); 417b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 418b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 419b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 420b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 421b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 422b01c7715SBarry Smith v += 16; 423b01c7715SBarry Smith } 424b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 425b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 426b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 427b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 428b01c7715SBarry Smith idiag -= 16; 429b01c7715SBarry Smith i2 -= 4; 430b01c7715SBarry Smith } 431efee365bSSatish Balay ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 432b01c7715SBarry Smith } 433b01c7715SBarry Smith } else { 434634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 435b01c7715SBarry Smith } 4361ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4371ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 438b01c7715SBarry Smith PetscFunctionReturn(0); 439b01c7715SBarry Smith } 440b01c7715SBarry Smith 441b01c7715SBarry Smith #undef __FUNCT__ 442b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 443c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 444b01c7715SBarry Smith { 445b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 446b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 447b01c7715SBarry Smith const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 448dfbe8321SBarry Smith PetscErrorCode ierr; 449c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 450c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 451b01c7715SBarry Smith 452b01c7715SBarry Smith PetscFunctionBegin; 453b01c7715SBarry Smith its = its*lits; 45477431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 455b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 456b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 457b01c7715SBarry Smith if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 458b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 459b01c7715SBarry Smith 460b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 461b01c7715SBarry Smith 462b01c7715SBarry Smith diag = a->diag; 463b01c7715SBarry Smith idiag = a->idiag; 4641ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4651ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 466b01c7715SBarry Smith 467b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 468b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 469b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 470b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 471b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 472b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 473b01c7715SBarry Smith x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 474b01c7715SBarry Smith i2 = 5; 475b01c7715SBarry Smith idiag += 25; 476b01c7715SBarry Smith for (i=1; i<m; i++) { 477b01c7715SBarry Smith v = aa + 25*ai[i]; 478b01c7715SBarry Smith vi = aj + ai[i]; 479b01c7715SBarry Smith nz = diag[i] - ai[i]; 480b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 481b01c7715SBarry Smith while (nz--) { 482b01c7715SBarry Smith idx = 5*(*vi++); 483b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 484b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 485b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 486b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 487b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 488b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 489b01c7715SBarry Smith v += 25; 490b01c7715SBarry Smith } 491b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 492b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 493b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 494b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 495b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 496b01c7715SBarry Smith idiag += 25; 497b01c7715SBarry Smith i2 += 5; 498b01c7715SBarry Smith } 499b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 500efee365bSSatish Balay ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 501b01c7715SBarry Smith } 502b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 503b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 504b01c7715SBarry Smith i2 = 0; 505b01c7715SBarry Smith mdiag = a->idiag+25*a->mbs; 506b01c7715SBarry Smith for (i=0; i<m; i++) { 507b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 508b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 509b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 510b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 511b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 512b01c7715SBarry Smith x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 513b01c7715SBarry Smith mdiag += 25; 514b01c7715SBarry Smith i2 += 5; 515b01c7715SBarry Smith } 516efee365bSSatish Balay ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 517b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 518b01c7715SBarry Smith ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 519b01c7715SBarry Smith } 520b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 521b01c7715SBarry Smith idiag = a->idiag+25*a->mbs - 25; 522b01c7715SBarry Smith i2 = 5*m - 5; 523b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 524b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 525b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 526b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 527b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 528b01c7715SBarry Smith x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 529b01c7715SBarry Smith idiag -= 25; 530b01c7715SBarry Smith i2 -= 5; 531b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 532b01c7715SBarry Smith v = aa + 25*(diag[i]+1); 533b01c7715SBarry Smith vi = aj + diag[i] + 1; 534b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 535b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 536b01c7715SBarry Smith while (nz--) { 537b01c7715SBarry Smith idx = 5*(*vi++); 538b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 539b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 540b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 541b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 542b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 543b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 544b01c7715SBarry Smith v += 25; 545b01c7715SBarry Smith } 546b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 547b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 548b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 549b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 550b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 551b01c7715SBarry Smith idiag -= 25; 552b01c7715SBarry Smith i2 -= 5; 553b01c7715SBarry Smith } 554efee365bSSatish Balay ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 555b01c7715SBarry Smith } 556b01c7715SBarry Smith } else { 557634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 558b01c7715SBarry Smith } 5591ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5601ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 561b01c7715SBarry Smith PetscFunctionReturn(0); 562b01c7715SBarry Smith } 563b01c7715SBarry Smith 564af674e45SBarry Smith /* 565af674e45SBarry Smith Special version for Fun3d sequential benchmark 566af674e45SBarry Smith */ 567af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 568af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 569af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 570af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 571af674e45SBarry Smith #endif 572af674e45SBarry Smith 5739c8c1041SBarry Smith EXTERN_C_BEGIN 574af674e45SBarry Smith #undef __FUNCT__ 575af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 576c1ac3661SBarry Smith void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 577af674e45SBarry Smith { 578af674e45SBarry Smith Mat A = *AA; 579af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 580c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 581c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 582c1ac3661SBarry Smith PetscInt *aj=a->j,stepval; 583f15d580aSBarry Smith const PetscScalar *value = v; 5844bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 585af674e45SBarry Smith 586af674e45SBarry Smith PetscFunctionBegin; 587af674e45SBarry Smith stepval = (n-1)*4; 588af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 589af674e45SBarry Smith row = im[k]; 590af674e45SBarry Smith rp = aj + ai[row]; 591af674e45SBarry Smith ap = aa + 16*ai[row]; 592af674e45SBarry Smith nrow = ailen[row]; 593af674e45SBarry Smith low = 0; 594af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 595af674e45SBarry Smith col = in[l]; 596af674e45SBarry Smith value = v + k*(stepval+4)*4 + l*4; 597af674e45SBarry Smith low = 0; high = nrow; 598af674e45SBarry Smith while (high-low > 7) { 599af674e45SBarry Smith t = (low+high)/2; 600af674e45SBarry Smith if (rp[t] > col) high = t; 601af674e45SBarry Smith else low = t; 602af674e45SBarry Smith } 603af674e45SBarry Smith for (i=low; i<high; i++) { 604af674e45SBarry Smith if (rp[i] > col) break; 605af674e45SBarry Smith if (rp[i] == col) { 606af674e45SBarry Smith bap = ap + 16*i; 607af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 608af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 609af674e45SBarry Smith bap[jj] += *value++; 610af674e45SBarry Smith } 611af674e45SBarry Smith } 612af674e45SBarry Smith goto noinsert2; 613af674e45SBarry Smith } 614af674e45SBarry Smith } 615af674e45SBarry Smith N = nrow++ - 1; 616af674e45SBarry Smith /* shift up all the later entries in this row */ 617af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 618af674e45SBarry Smith rp[ii+1] = rp[ii]; 619a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 620af674e45SBarry Smith } 621af674e45SBarry Smith if (N >= i) { 622a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 623af674e45SBarry Smith } 624af674e45SBarry Smith rp[i] = col; 625af674e45SBarry Smith bap = ap + 16*i; 626af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 627af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 628af674e45SBarry Smith bap[jj] = *value++; 629af674e45SBarry Smith } 630af674e45SBarry Smith } 631af674e45SBarry Smith noinsert2:; 632af674e45SBarry Smith low = i; 633af674e45SBarry Smith } 634af674e45SBarry Smith ailen[row] = nrow; 635af674e45SBarry Smith } 636af674e45SBarry Smith } 6379c8c1041SBarry Smith EXTERN_C_END 638af674e45SBarry Smith 639af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 640af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 641af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 642af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 643af674e45SBarry Smith #endif 644af674e45SBarry Smith 6459c8c1041SBarry Smith EXTERN_C_BEGIN 646af674e45SBarry Smith #undef __FUNCT__ 647af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 648c1ac3661SBarry Smith void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 649af674e45SBarry Smith { 650af674e45SBarry Smith Mat A = *AA; 651af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 652c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 653c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 654c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 655c1ac3661SBarry Smith PetscInt ridx,cidx; 656af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 657af674e45SBarry Smith 658af674e45SBarry Smith PetscFunctionBegin; 659af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 660af674e45SBarry Smith row = im[k]; brow = row/4; 661af674e45SBarry Smith rp = aj + ai[brow]; 662af674e45SBarry Smith ap = aa + 16*ai[brow]; 663af674e45SBarry Smith nrow = ailen[brow]; 664af674e45SBarry Smith low = 0; 665af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 666af674e45SBarry Smith col = in[l]; bcol = col/4; 667af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 668af674e45SBarry Smith value = v[l + k*n]; 669af674e45SBarry Smith low = 0; high = nrow; 670af674e45SBarry Smith while (high-low > 7) { 671af674e45SBarry Smith t = (low+high)/2; 672af674e45SBarry Smith if (rp[t] > bcol) high = t; 673af674e45SBarry Smith else low = t; 674af674e45SBarry Smith } 675af674e45SBarry Smith for (i=low; i<high; i++) { 676af674e45SBarry Smith if (rp[i] > bcol) break; 677af674e45SBarry Smith if (rp[i] == bcol) { 678af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 679af674e45SBarry Smith *bap += value; 680af674e45SBarry Smith goto noinsert1; 681af674e45SBarry Smith } 682af674e45SBarry Smith } 683af674e45SBarry Smith N = nrow++ - 1; 684af674e45SBarry Smith /* shift up all the later entries in this row */ 685af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 686af674e45SBarry Smith rp[ii+1] = rp[ii]; 687a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 688af674e45SBarry Smith } 689af674e45SBarry Smith if (N>=i) { 690a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 691af674e45SBarry Smith } 692af674e45SBarry Smith rp[i] = bcol; 693af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 694af674e45SBarry Smith noinsert1:; 695af674e45SBarry Smith low = i; 696af674e45SBarry Smith } 697af674e45SBarry Smith ailen[brow] = nrow; 698af674e45SBarry Smith } 699af674e45SBarry Smith } 7009c8c1041SBarry Smith EXTERN_C_END 701af674e45SBarry Smith 70295d5f7c2SBarry Smith /* UGLY, ugly, ugly 70387828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 7043477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 70595d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 70695d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 70795d5f7c2SBarry Smith into the single precision data structures. 70895d5f7c2SBarry Smith */ 70995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 710690b6cddSBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 71195d5f7c2SBarry Smith #else 71295d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 71395d5f7c2SBarry Smith #endif 71495d5f7c2SBarry Smith 7152d61bbb3SSatish Balay #define CHUNKSIZE 10 716de6a44a3SBarry Smith 717be5855fcSBarry Smith /* 718be5855fcSBarry Smith Checks for missing diagonals 719be5855fcSBarry Smith */ 7204a2ae208SSatish Balay #undef __FUNCT__ 7214a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 722dfbe8321SBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 723be5855fcSBarry Smith { 724be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7256849ba73SBarry Smith PetscErrorCode ierr; 726c1ac3661SBarry Smith PetscInt *diag,*jj = a->j,i; 727be5855fcSBarry Smith 728be5855fcSBarry Smith PetscFunctionBegin; 729c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 730883fce79SBarry Smith diag = a->diag; 7310e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 732be5855fcSBarry Smith if (jj[diag[i]] != i) { 73377431f27SBarry Smith SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 734be5855fcSBarry Smith } 735be5855fcSBarry Smith } 736be5855fcSBarry Smith PetscFunctionReturn(0); 737be5855fcSBarry Smith } 738be5855fcSBarry Smith 7394a2ae208SSatish Balay #undef __FUNCT__ 7404a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 741dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 742de6a44a3SBarry Smith { 743de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7446849ba73SBarry Smith PetscErrorCode ierr; 745c1ac3661SBarry Smith PetscInt i,j,*diag,m = a->mbs; 746de6a44a3SBarry Smith 7473a40ed3dSBarry Smith PetscFunctionBegin; 748883fce79SBarry Smith if (a->diag) PetscFunctionReturn(0); 749883fce79SBarry Smith 750c1ac3661SBarry Smith ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 75152e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 7527fc0212eSBarry Smith for (i=0; i<m; i++) { 753ecc615b2SBarry Smith diag[i] = a->i[i+1]; 754de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 755de6a44a3SBarry Smith if (a->j[j] == i) { 756de6a44a3SBarry Smith diag[i] = j; 757de6a44a3SBarry Smith break; 758de6a44a3SBarry Smith } 759de6a44a3SBarry Smith } 760de6a44a3SBarry Smith } 761de6a44a3SBarry Smith a->diag = diag; 7623a40ed3dSBarry Smith PetscFunctionReturn(0); 763de6a44a3SBarry Smith } 7642593348eSBarry Smith 7652593348eSBarry Smith 766690b6cddSBarry Smith EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 7673b2fbd54SBarry Smith 7684a2ae208SSatish Balay #undef __FUNCT__ 7694a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 770c1ac3661SBarry Smith static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 7713b2fbd54SBarry Smith { 7723b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 773dfbe8321SBarry Smith PetscErrorCode ierr; 774c1ac3661SBarry Smith PetscInt n = a->mbs,i; 7753b2fbd54SBarry Smith 7763a40ed3dSBarry Smith PetscFunctionBegin; 7773b2fbd54SBarry Smith *nn = n; 7783a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 7793b2fbd54SBarry Smith if (symmetric) { 7803b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 7813b2fbd54SBarry Smith } else if (oshift == 1) { 7823b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 783c1ac3661SBarry Smith PetscInt nz = a->i[n]; 7843b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]++; 7853b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]++; 7863b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 7873b2fbd54SBarry Smith } else { 7883b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 7893b2fbd54SBarry Smith } 7903b2fbd54SBarry Smith 7913a40ed3dSBarry Smith PetscFunctionReturn(0); 7923b2fbd54SBarry Smith } 7933b2fbd54SBarry Smith 7944a2ae208SSatish Balay #undef __FUNCT__ 7954a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 796c1ac3661SBarry Smith static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 7973b2fbd54SBarry Smith { 7983b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7996849ba73SBarry Smith PetscErrorCode ierr; 800c1ac3661SBarry Smith PetscInt i,n = a->mbs; 8013b2fbd54SBarry Smith 8023a40ed3dSBarry Smith PetscFunctionBegin; 8033a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 8043b2fbd54SBarry Smith if (symmetric) { 805606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 806606d414cSSatish Balay ierr = PetscFree(*ja);CHKERRQ(ierr); 807af5da2bfSBarry Smith } else if (oshift == 1) { 808c1ac3661SBarry Smith PetscInt nz = a->i[n]-1; 8093b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]--; 8103b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 8113b2fbd54SBarry Smith } 8123a40ed3dSBarry Smith PetscFunctionReturn(0); 8133b2fbd54SBarry Smith } 8143b2fbd54SBarry Smith 8154a2ae208SSatish Balay #undef __FUNCT__ 8164a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 817dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 8182d61bbb3SSatish Balay { 8192d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 820dfbe8321SBarry Smith PetscErrorCode ierr; 8212d61bbb3SSatish Balay 822433994e6SBarry Smith PetscFunctionBegin; 823aa482453SBarry Smith #if defined(PETSC_USE_LOG) 82477431f27SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 8252d61bbb3SSatish Balay #endif 826606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 827606d414cSSatish Balay if (!a->singlemalloc) { 828606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 829606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 830606d414cSSatish Balay } 831c38d4ed2SBarry Smith if (a->row) { 832c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 833c38d4ed2SBarry Smith } 834c38d4ed2SBarry Smith if (a->col) { 835c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 836c38d4ed2SBarry Smith } 837606d414cSSatish Balay if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 838a1d92eedSBarry Smith if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 839606d414cSSatish Balay if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 840606d414cSSatish Balay if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 841606d414cSSatish Balay if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 842606d414cSSatish Balay if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 843e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 844606d414cSSatish Balay if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 845563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 846563b5814SBarry Smith if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 847563b5814SBarry Smith #endif 848c4319e64SHong Zhang if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 84973e7a558SHong Zhang if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 850c4319e64SHong Zhang 851606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 852901853e0SKris Buschelman 853*43516a2dSKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 854901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 855901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 856901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 857901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 858901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 859901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 8602d61bbb3SSatish Balay PetscFunctionReturn(0); 8612d61bbb3SSatish Balay } 8622d61bbb3SSatish Balay 8634a2ae208SSatish Balay #undef __FUNCT__ 8644a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 865dfbe8321SBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 8662d61bbb3SSatish Balay { 8672d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 86863ba0a88SBarry Smith PetscErrorCode ierr; 8692d61bbb3SSatish Balay 8702d61bbb3SSatish Balay PetscFunctionBegin; 871aa275fccSKris Buschelman switch (op) { 872aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 873aa275fccSKris Buschelman a->roworiented = PETSC_TRUE; 874aa275fccSKris Buschelman break; 875aa275fccSKris Buschelman case MAT_COLUMN_ORIENTED: 876aa275fccSKris Buschelman a->roworiented = PETSC_FALSE; 877aa275fccSKris Buschelman break; 878aa275fccSKris Buschelman case MAT_COLUMNS_SORTED: 879aa275fccSKris Buschelman a->sorted = PETSC_TRUE; 880aa275fccSKris Buschelman break; 881aa275fccSKris Buschelman case MAT_COLUMNS_UNSORTED: 882aa275fccSKris Buschelman a->sorted = PETSC_FALSE; 883aa275fccSKris Buschelman break; 884aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 885aa275fccSKris Buschelman a->keepzeroedrows = PETSC_TRUE; 886aa275fccSKris Buschelman break; 887aa275fccSKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 888aa275fccSKris Buschelman a->nonew = 1; 889aa275fccSKris Buschelman break; 890aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 891aa275fccSKris Buschelman a->nonew = -1; 892aa275fccSKris Buschelman break; 893aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 894aa275fccSKris Buschelman a->nonew = -2; 895aa275fccSKris Buschelman break; 896aa275fccSKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 897aa275fccSKris Buschelman a->nonew = 0; 898aa275fccSKris Buschelman break; 899aa275fccSKris Buschelman case MAT_ROWS_SORTED: 900aa275fccSKris Buschelman case MAT_ROWS_UNSORTED: 901aa275fccSKris Buschelman case MAT_YES_NEW_DIAGONALS: 902aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 903aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 90463ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 905aa275fccSKris Buschelman break; 906aa275fccSKris Buschelman case MAT_NO_NEW_DIAGONALS: 90729bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 90877e54ba9SKris Buschelman case MAT_SYMMETRIC: 90977e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 9109a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 9119a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 9129a4540c5SBarry Smith case MAT_HERMITIAN: 9139a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 9149a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 9159a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 91677e54ba9SKris Buschelman break; 917aa275fccSKris Buschelman default: 91829bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 9192d61bbb3SSatish Balay } 9202d61bbb3SSatish Balay PetscFunctionReturn(0); 9212d61bbb3SSatish Balay } 9222d61bbb3SSatish Balay 9234a2ae208SSatish Balay #undef __FUNCT__ 9244a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 925c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 9262d61bbb3SSatish Balay { 9272d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 9286849ba73SBarry Smith PetscErrorCode ierr; 929c1ac3661SBarry Smith PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 9303f1db9ecSBarry Smith MatScalar *aa,*aa_i; 93187828ca2SBarry Smith PetscScalar *v_i; 9322d61bbb3SSatish Balay 9332d61bbb3SSatish Balay PetscFunctionBegin; 934521d7252SBarry Smith bs = A->bs; 9352d61bbb3SSatish Balay ai = a->i; 9362d61bbb3SSatish Balay aj = a->j; 9372d61bbb3SSatish Balay aa = a->a; 9382d61bbb3SSatish Balay bs2 = a->bs2; 9392d61bbb3SSatish Balay 94077431f27SBarry Smith if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 9412d61bbb3SSatish Balay 9422d61bbb3SSatish Balay bn = row/bs; /* Block number */ 9432d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 9442d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 9452d61bbb3SSatish Balay *nz = bs*M; 9462d61bbb3SSatish Balay 9472d61bbb3SSatish Balay if (v) { 9482d61bbb3SSatish Balay *v = 0; 9492d61bbb3SSatish Balay if (*nz) { 95087828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 9512d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 9522d61bbb3SSatish Balay v_i = *v + i*bs; 9532d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 9542d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 9552d61bbb3SSatish Balay } 9562d61bbb3SSatish Balay } 9572d61bbb3SSatish Balay } 9582d61bbb3SSatish Balay 9592d61bbb3SSatish Balay if (idx) { 9602d61bbb3SSatish Balay *idx = 0; 9612d61bbb3SSatish Balay if (*nz) { 962c1ac3661SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 9632d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 9642d61bbb3SSatish Balay idx_i = *idx + i*bs; 9652d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 9662d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 9672d61bbb3SSatish Balay } 9682d61bbb3SSatish Balay } 9692d61bbb3SSatish Balay } 9702d61bbb3SSatish Balay PetscFunctionReturn(0); 9712d61bbb3SSatish Balay } 9722d61bbb3SSatish Balay 9734a2ae208SSatish Balay #undef __FUNCT__ 9744a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 975c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 9762d61bbb3SSatish Balay { 977dfbe8321SBarry Smith PetscErrorCode ierr; 978606d414cSSatish Balay 9792d61bbb3SSatish Balay PetscFunctionBegin; 980606d414cSSatish Balay if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 981606d414cSSatish Balay if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 9822d61bbb3SSatish Balay PetscFunctionReturn(0); 9832d61bbb3SSatish Balay } 9842d61bbb3SSatish Balay 9854a2ae208SSatish Balay #undef __FUNCT__ 9864a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 987dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 9882d61bbb3SSatish Balay { 9892d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 9902d61bbb3SSatish Balay Mat C; 9916849ba73SBarry Smith PetscErrorCode ierr; 992521d7252SBarry Smith PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 993c1ac3661SBarry Smith PetscInt *rows,*cols,bs2=a->bs2; 99487828ca2SBarry Smith PetscScalar *array; 9952d61bbb3SSatish Balay 9962d61bbb3SSatish Balay PetscFunctionBegin; 99729bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 998c1ac3661SBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 999c1ac3661SBarry Smith ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 10002d61bbb3SSatish Balay 1001375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 100287828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 100387828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1004375fe846SBarry Smith #else 10053eda8832SBarry Smith array = a->a; 1006375fe846SBarry Smith #endif 1007375fe846SBarry Smith 10082d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1009f204ca49SKris Buschelman ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 1010f204ca49SKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1011f204ca49SKris Buschelman ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1012606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 1013c1ac3661SBarry Smith ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 10142d61bbb3SSatish Balay cols = rows + bs; 10152d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 10162d61bbb3SSatish Balay cols[0] = i*bs; 10172d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 10182d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 10192d61bbb3SSatish Balay for (j=0; j<len; j++) { 10202d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 10212d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 10222d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 10232d61bbb3SSatish Balay array += bs2; 10242d61bbb3SSatish Balay } 10252d61bbb3SSatish Balay } 1026606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 1027375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 1028375fe846SBarry Smith ierr = PetscFree(array); 1029375fe846SBarry Smith #endif 10302d61bbb3SSatish Balay 10312d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10322d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10332d61bbb3SSatish Balay 1034c4992f7dSBarry Smith if (B) { 10352d61bbb3SSatish Balay *B = C; 10362d61bbb3SSatish Balay } else { 1037273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 10382d61bbb3SSatish Balay } 10392d61bbb3SSatish Balay PetscFunctionReturn(0); 10402d61bbb3SSatish Balay } 10412d61bbb3SSatish Balay 10424a2ae208SSatish Balay #undef __FUNCT__ 10434a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 10446849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 10452593348eSBarry Smith { 1046b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 10476849ba73SBarry Smith PetscErrorCode ierr; 1048521d7252SBarry Smith PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1049b24ad042SBarry Smith int fd; 105087828ca2SBarry Smith PetscScalar *aa; 1051ce6f0cecSBarry Smith FILE *file; 10522593348eSBarry Smith 10533a40ed3dSBarry Smith PetscFunctionBegin; 1054b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1055c1ac3661SBarry Smith ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1056552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 10573b2fbd54SBarry Smith 1058273d9f13SBarry Smith col_lens[1] = A->m; 1059273d9f13SBarry Smith col_lens[2] = A->n; 10607e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 10612593348eSBarry Smith 10622593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 1063b6490206SBarry Smith count = 0; 1064b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1065b6490206SBarry Smith for (j=0; j<bs; j++) { 1066b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1067b6490206SBarry Smith } 10682593348eSBarry Smith } 10696f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1070606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 10712593348eSBarry Smith 10722593348eSBarry Smith /* store column indices (zero start index) */ 1073c1ac3661SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1074b6490206SBarry Smith count = 0; 1075b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1076b6490206SBarry Smith for (j=0; j<bs; j++) { 1077b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1078b6490206SBarry Smith for (l=0; l<bs; l++) { 1079b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 10802593348eSBarry Smith } 10812593348eSBarry Smith } 1082b6490206SBarry Smith } 1083b6490206SBarry Smith } 10846f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1085606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 10862593348eSBarry Smith 10872593348eSBarry Smith /* store nonzero values */ 108887828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1089b6490206SBarry Smith count = 0; 1090b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1091b6490206SBarry Smith for (j=0; j<bs; j++) { 1092b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1093b6490206SBarry Smith for (l=0; l<bs; l++) { 10947e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 1095b6490206SBarry Smith } 1096b6490206SBarry Smith } 1097b6490206SBarry Smith } 1098b6490206SBarry Smith } 10996f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1100606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1101ce6f0cecSBarry Smith 1102b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1103ce6f0cecSBarry Smith if (file) { 1104521d7252SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1105ce6f0cecSBarry Smith } 11063a40ed3dSBarry Smith PetscFunctionReturn(0); 11072593348eSBarry Smith } 11082593348eSBarry Smith 11094a2ae208SSatish Balay #undef __FUNCT__ 11104a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 11116849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 11122593348eSBarry Smith { 1113b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1114dfbe8321SBarry Smith PetscErrorCode ierr; 1115521d7252SBarry Smith PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1116f3ef73ceSBarry Smith PetscViewerFormat format; 11172593348eSBarry Smith 11183a40ed3dSBarry Smith PetscFunctionBegin; 1119b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1120456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 112177431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1122fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1123bcd9e38bSBarry Smith Mat aij; 1124ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1125bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 1126bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 112704929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 112804929863SHong Zhang PetscFunctionReturn(0); 1129fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1130b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 113144cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 113244cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 113377431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 113444cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 113544cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1136aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 11370e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 113877431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 11390e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11400e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 114177431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 11420e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11430e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 114477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11450ef38995SBarry Smith } 114644cd7ae7SLois Curfman McInnes #else 11470ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 114877431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 11490ef38995SBarry Smith } 115044cd7ae7SLois Curfman McInnes #endif 115144cd7ae7SLois Curfman McInnes } 115244cd7ae7SLois Curfman McInnes } 1153b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 115444cd7ae7SLois Curfman McInnes } 115544cd7ae7SLois Curfman McInnes } 1156b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 11570ef38995SBarry Smith } else { 1158b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1159b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1160b6490206SBarry Smith for (j=0; j<bs; j++) { 116177431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1162b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1163b6490206SBarry Smith for (l=0; l<bs; l++) { 1164aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 11650e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 116677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 11670e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11680e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 116977431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 11700e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 11710ef38995SBarry Smith } else { 117277431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 117388685aaeSLois Curfman McInnes } 117488685aaeSLois Curfman McInnes #else 117577431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 117688685aaeSLois Curfman McInnes #endif 11772593348eSBarry Smith } 11782593348eSBarry Smith } 1179b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 11802593348eSBarry Smith } 11812593348eSBarry Smith } 1182b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1183b6490206SBarry Smith } 1184b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 11853a40ed3dSBarry Smith PetscFunctionReturn(0); 11862593348eSBarry Smith } 11872593348eSBarry Smith 11884a2ae208SSatish Balay #undef __FUNCT__ 11894a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 11906849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 11913270192aSSatish Balay { 119277ed5343SBarry Smith Mat A = (Mat) Aa; 11933270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 11946849ba73SBarry Smith PetscErrorCode ierr; 1195521d7252SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 11960e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 11973f1db9ecSBarry Smith MatScalar *aa; 1198b0a32e0cSBarry Smith PetscViewer viewer; 11993270192aSSatish Balay 12003a40ed3dSBarry Smith PetscFunctionBegin; 12013270192aSSatish Balay 1202b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 120377ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 120477ed5343SBarry Smith 1205b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 120677ed5343SBarry Smith 12073270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1208b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 12093270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 12103270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1211273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 12123270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 12133270192aSSatish Balay aa = a->a + j*bs2; 12143270192aSSatish Balay for (k=0; k<bs; k++) { 12153270192aSSatish Balay for (l=0; l<bs; l++) { 12160e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1217b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 12183270192aSSatish Balay } 12193270192aSSatish Balay } 12203270192aSSatish Balay } 12213270192aSSatish Balay } 1222b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 12233270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 12243270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1225273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 12263270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 12273270192aSSatish Balay aa = a->a + j*bs2; 12283270192aSSatish Balay for (k=0; k<bs; k++) { 12293270192aSSatish Balay for (l=0; l<bs; l++) { 12300e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1231b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 12323270192aSSatish Balay } 12333270192aSSatish Balay } 12343270192aSSatish Balay } 12353270192aSSatish Balay } 12363270192aSSatish Balay 1237b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 12383270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 12393270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1240273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 12413270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 12423270192aSSatish Balay aa = a->a + j*bs2; 12433270192aSSatish Balay for (k=0; k<bs; k++) { 12443270192aSSatish Balay for (l=0; l<bs; l++) { 12450e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1246b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 12473270192aSSatish Balay } 12483270192aSSatish Balay } 12493270192aSSatish Balay } 12503270192aSSatish Balay } 125177ed5343SBarry Smith PetscFunctionReturn(0); 125277ed5343SBarry Smith } 12533270192aSSatish Balay 12544a2ae208SSatish Balay #undef __FUNCT__ 12554a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 12566849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 125777ed5343SBarry Smith { 1258dfbe8321SBarry Smith PetscErrorCode ierr; 12590e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1260b0a32e0cSBarry Smith PetscDraw draw; 126177ed5343SBarry Smith PetscTruth isnull; 12623270192aSSatish Balay 126377ed5343SBarry Smith PetscFunctionBegin; 126477ed5343SBarry Smith 1265b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1266b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 126777ed5343SBarry Smith 126877ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1269273d9f13SBarry Smith xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 127077ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1271b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1272b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 127377ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 12743a40ed3dSBarry Smith PetscFunctionReturn(0); 12753270192aSSatish Balay } 12763270192aSSatish Balay 12774a2ae208SSatish Balay #undef __FUNCT__ 12784a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 1279dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 12802593348eSBarry Smith { 1281dfbe8321SBarry Smith PetscErrorCode ierr; 128232077d6dSBarry Smith PetscTruth iascii,isbinary,isdraw; 12832593348eSBarry Smith 12843a40ed3dSBarry Smith PetscFunctionBegin; 128532077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1286fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1287fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 128832077d6dSBarry Smith if (iascii){ 12893a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 12900f5bd95cSBarry Smith } else if (isbinary) { 12913a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 12920f5bd95cSBarry Smith } else if (isdraw) { 12933a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 12945cd90555SBarry Smith } else { 1295a5e6ed63SBarry Smith Mat B; 1296ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1297a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1298a5e6ed63SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 12992593348eSBarry Smith } 13003a40ed3dSBarry Smith PetscFunctionReturn(0); 13012593348eSBarry Smith } 1302b6490206SBarry Smith 1303cd0e1443SSatish Balay 13044a2ae208SSatish Balay #undef __FUNCT__ 13054a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 1306c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1307cd0e1443SSatish Balay { 1308cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1309c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1310c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1311521d7252SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 13123f1db9ecSBarry Smith MatScalar *ap,*aa = a->a,zero = 0.0; 1313cd0e1443SSatish Balay 13143a40ed3dSBarry Smith PetscFunctionBegin; 13152d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1316cd0e1443SSatish Balay row = im[k]; brow = row/bs; 131729bbc08cSBarry Smith if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 131877431f27SBarry Smith if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 13192d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 13202c3acbe9SBarry Smith nrow = ailen[brow]; 13212d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 132229bbc08cSBarry Smith if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 132377431f27SBarry Smith if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 13242d61bbb3SSatish Balay col = in[l] ; 13252d61bbb3SSatish Balay bcol = col/bs; 13262d61bbb3SSatish Balay cidx = col%bs; 13272d61bbb3SSatish Balay ridx = row%bs; 13282d61bbb3SSatish Balay high = nrow; 13292d61bbb3SSatish Balay low = 0; /* assume unsorted */ 13302d61bbb3SSatish Balay while (high-low > 5) { 1331cd0e1443SSatish Balay t = (low+high)/2; 1332cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1333cd0e1443SSatish Balay else low = t; 1334cd0e1443SSatish Balay } 1335cd0e1443SSatish Balay for (i=low; i<high; i++) { 1336cd0e1443SSatish Balay if (rp[i] > bcol) break; 1337cd0e1443SSatish Balay if (rp[i] == bcol) { 13382d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 13392d61bbb3SSatish Balay goto finished; 1340cd0e1443SSatish Balay } 1341cd0e1443SSatish Balay } 13422d61bbb3SSatish Balay *v++ = zero; 13432d61bbb3SSatish Balay finished:; 1344cd0e1443SSatish Balay } 1345cd0e1443SSatish Balay } 13463a40ed3dSBarry Smith PetscFunctionReturn(0); 1347cd0e1443SSatish Balay } 1348cd0e1443SSatish Balay 134995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 13504a2ae208SSatish Balay #undef __FUNCT__ 13514a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1352c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 135395d5f7c2SBarry Smith { 1354563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1355dfbe8321SBarry Smith PetscErrorCode ierr; 1356c1ac3661SBarry Smith PetscInt i,N = m*n*b->bs2; 1357563b5814SBarry Smith MatScalar *vsingle; 135895d5f7c2SBarry Smith 135995d5f7c2SBarry Smith PetscFunctionBegin; 1360563b5814SBarry Smith if (N > b->setvalueslen) { 1361563b5814SBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1362b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1363563b5814SBarry Smith b->setvalueslen = N; 1364563b5814SBarry Smith } 1365563b5814SBarry Smith vsingle = b->setvaluescopy; 136695d5f7c2SBarry Smith for (i=0; i<N; i++) { 136795d5f7c2SBarry Smith vsingle[i] = v[i]; 136895d5f7c2SBarry Smith } 136995d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 137095d5f7c2SBarry Smith PetscFunctionReturn(0); 137195d5f7c2SBarry Smith } 137295d5f7c2SBarry Smith #endif 137395d5f7c2SBarry Smith 13742d61bbb3SSatish Balay 13754a2ae208SSatish Balay #undef __FUNCT__ 13764a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1377c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 137892c4ed94SBarry Smith { 137992c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1380e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1381c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 13826849ba73SBarry Smith PetscErrorCode ierr; 1383521d7252SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1384273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 1385f15d580aSBarry Smith const MatScalar *value = v; 1386f15d580aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 138792c4ed94SBarry Smith 13883a40ed3dSBarry Smith PetscFunctionBegin; 13890e324ae4SSatish Balay if (roworiented) { 13900e324ae4SSatish Balay stepval = (n-1)*bs; 13910e324ae4SSatish Balay } else { 13920e324ae4SSatish Balay stepval = (m-1)*bs; 13930e324ae4SSatish Balay } 139492c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 139592c4ed94SBarry Smith row = im[k]; 13965ef9f2a5SBarry Smith if (row < 0) continue; 13972515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 139877431f27SBarry Smith if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 139992c4ed94SBarry Smith #endif 140092c4ed94SBarry Smith rp = aj + ai[row]; 140192c4ed94SBarry Smith ap = aa + bs2*ai[row]; 140292c4ed94SBarry Smith rmax = imax[row]; 140392c4ed94SBarry Smith nrow = ailen[row]; 140492c4ed94SBarry Smith low = 0; 1405c71e6ed7SBarry Smith high = nrow; 140692c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 14075ef9f2a5SBarry Smith if (in[l] < 0) continue; 14082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 140977431f27SBarry Smith if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 141092c4ed94SBarry Smith #endif 141192c4ed94SBarry Smith col = in[l]; 141292c4ed94SBarry Smith if (roworiented) { 14130e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 14140e324ae4SSatish Balay } else { 14150e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 141692c4ed94SBarry Smith } 1417c71e6ed7SBarry Smith if (col < lastcol) low = 0; else high = nrow; 1418e2ee6c50SBarry Smith lastcol = col; 141992c4ed94SBarry Smith while (high-low > 7) { 142092c4ed94SBarry Smith t = (low+high)/2; 142192c4ed94SBarry Smith if (rp[t] > col) high = t; 142292c4ed94SBarry Smith else low = t; 142392c4ed94SBarry Smith } 142492c4ed94SBarry Smith for (i=low; i<high; i++) { 142592c4ed94SBarry Smith if (rp[i] > col) break; 142692c4ed94SBarry Smith if (rp[i] == col) { 14278a84c255SSatish Balay bap = ap + bs2*i; 14280e324ae4SSatish Balay if (roworiented) { 14298a84c255SSatish Balay if (is == ADD_VALUES) { 1430dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1431dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 14328a84c255SSatish Balay bap[jj] += *value++; 1433dd9472c6SBarry Smith } 1434dd9472c6SBarry Smith } 14350e324ae4SSatish Balay } else { 1436dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1437dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 14380e324ae4SSatish Balay bap[jj] = *value++; 14398a84c255SSatish Balay } 1440dd9472c6SBarry Smith } 1441dd9472c6SBarry Smith } 14420e324ae4SSatish Balay } else { 14430e324ae4SSatish Balay if (is == ADD_VALUES) { 1444dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1445dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 14460e324ae4SSatish Balay *bap++ += *value++; 1447dd9472c6SBarry Smith } 1448dd9472c6SBarry Smith } 14490e324ae4SSatish Balay } else { 1450dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1451dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 14520e324ae4SSatish Balay *bap++ = *value++; 14530e324ae4SSatish Balay } 14548a84c255SSatish Balay } 1455dd9472c6SBarry Smith } 1456dd9472c6SBarry Smith } 1457f1241b54SBarry Smith goto noinsert2; 145892c4ed94SBarry Smith } 145992c4ed94SBarry Smith } 146089280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 146177431f27SBarry Smith else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 146292c4ed94SBarry Smith if (nrow >= rmax) { 146392c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 1464c1ac3661SBarry Smith PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 14653f1db9ecSBarry Smith MatScalar *new_a; 146692c4ed94SBarry Smith 146777431f27SBarry Smith if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 146889280ab3SLois Curfman McInnes 146992c4ed94SBarry Smith /* malloc new storage space */ 1470c1ac3661SBarry Smith len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1471b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1472690b6cddSBarry Smith new_j = (PetscInt*)(new_a + bs2*new_nz); 147392c4ed94SBarry Smith new_i = new_j + new_nz; 147492c4ed94SBarry Smith 147592c4ed94SBarry Smith /* copy over old data into new slots */ 147692c4ed94SBarry Smith for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 147792c4ed94SBarry Smith for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1478c1ac3661SBarry Smith ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 147992c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1480c1ac3661SBarry Smith ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1481549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1483549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 148492c4ed94SBarry Smith /* free up old matrix storage */ 1485606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1486606d414cSSatish Balay if (!a->singlemalloc) { 1487606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1488606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1489606d414cSSatish Balay } 149092c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1491c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 149292c4ed94SBarry Smith 149392c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 149492c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 149552e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 149692c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 149792c4ed94SBarry Smith a->reallocs++; 149892c4ed94SBarry Smith a->nz++; 149992c4ed94SBarry Smith } 150092c4ed94SBarry Smith N = nrow++ - 1; 150192c4ed94SBarry Smith /* shift up all the later entries in this row */ 150292c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 150392c4ed94SBarry Smith rp[ii+1] = rp[ii]; 1504549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 150592c4ed94SBarry Smith } 1506549d3d68SSatish Balay if (N >= i) { 1507549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1508549d3d68SSatish Balay } 150992c4ed94SBarry Smith rp[i] = col; 15108a84c255SSatish Balay bap = ap + bs2*i; 15110e324ae4SSatish Balay if (roworiented) { 1512dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1513dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 15140e324ae4SSatish Balay bap[jj] = *value++; 1515dd9472c6SBarry Smith } 1516dd9472c6SBarry Smith } 15170e324ae4SSatish Balay } else { 1518dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1519dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 15200e324ae4SSatish Balay *bap++ = *value++; 15210e324ae4SSatish Balay } 1522dd9472c6SBarry Smith } 1523dd9472c6SBarry Smith } 1524f1241b54SBarry Smith noinsert2:; 152592c4ed94SBarry Smith low = i; 152692c4ed94SBarry Smith } 152792c4ed94SBarry Smith ailen[row] = nrow; 152892c4ed94SBarry Smith } 15293a40ed3dSBarry Smith PetscFunctionReturn(0); 153092c4ed94SBarry Smith } 153126e093fcSHong Zhang 15324a2ae208SSatish Balay #undef __FUNCT__ 15334a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1534dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1535584200bdSSatish Balay { 1536584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1537c1ac3661SBarry Smith PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1538c1ac3661SBarry Smith PetscInt m = A->m,*ip,N,*ailen = a->ilen; 15396849ba73SBarry Smith PetscErrorCode ierr; 1540c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 15413f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 15423447b6efSHong Zhang PetscReal ratio=0.6; 1543584200bdSSatish Balay 15443a40ed3dSBarry Smith PetscFunctionBegin; 15453a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1546584200bdSSatish Balay 154743ee02c3SBarry Smith if (m) rmax = ailen[0]; 1548584200bdSSatish Balay for (i=1; i<mbs; i++) { 1549584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1550584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1551d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1552584200bdSSatish Balay if (fshift) { 1553a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1554584200bdSSatish Balay N = ailen[i]; 1555584200bdSSatish Balay for (j=0; j<N; j++) { 1556584200bdSSatish Balay ip[j-fshift] = ip[j]; 1557549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1558584200bdSSatish Balay } 1559584200bdSSatish Balay } 1560584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1561584200bdSSatish Balay } 1562584200bdSSatish Balay if (mbs) { 1563584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1564584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1565584200bdSSatish Balay } 1566584200bdSSatish Balay /* reset ilen and imax for each row */ 1567584200bdSSatish Balay for (i=0; i<mbs; i++) { 1568584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1569584200bdSSatish Balay } 1570a7c10996SSatish Balay a->nz = ai[mbs]; 1571584200bdSSatish Balay 1572584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1573b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1574584200bdSSatish Balay if (fshift && a->diag) { 1575606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 157652e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1577584200bdSSatish Balay a->diag = 0; 1578584200bdSSatish Balay } 157963ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 158063ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 158163ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1582e2f3b5e9SSatish Balay a->reallocs = 0; 15830e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1584cf4441caSHong Zhang 1585cb5d8e9eSHong Zhang /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 15862f53aa61SHong Zhang if (a->compressedrow.use){ 1587317fbc4cSHong Zhang ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 158873e7a558SHong Zhang } 158988e51ccdSHong Zhang 159088e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 15913a40ed3dSBarry Smith PetscFunctionReturn(0); 1592584200bdSSatish Balay } 1593584200bdSSatish Balay 1594bea157c4SSatish Balay /* 1595bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1596bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1597bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1598bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1599bea157c4SSatish Balay */ 16004a2ae208SSatish Balay #undef __FUNCT__ 16014a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1602c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1603d9b7c43dSSatish Balay { 1604c1ac3661SBarry Smith PetscInt i,j,k,row; 1605bea157c4SSatish Balay PetscTruth flg; 16063a40ed3dSBarry Smith 1607433994e6SBarry Smith PetscFunctionBegin; 1608bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1609bea157c4SSatish Balay row = idx[i]; 1610bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1611bea157c4SSatish Balay sizes[j] = 1; 1612bea157c4SSatish Balay i++; 1613e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1614bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1615bea157c4SSatish Balay i++; 1616bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1617bea157c4SSatish Balay flg = PETSC_TRUE; 1618bea157c4SSatish Balay for (k=1; k<bs; k++) { 1619bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1620bea157c4SSatish Balay flg = PETSC_FALSE; 1621bea157c4SSatish Balay break; 1622d9b7c43dSSatish Balay } 1623bea157c4SSatish Balay } 1624abc0a331SBarry Smith if (flg) { /* No break in the bs */ 1625bea157c4SSatish Balay sizes[j] = bs; 1626bea157c4SSatish Balay i+= bs; 1627bea157c4SSatish Balay } else { 1628bea157c4SSatish Balay sizes[j] = 1; 1629bea157c4SSatish Balay i++; 1630bea157c4SSatish Balay } 1631bea157c4SSatish Balay } 1632bea157c4SSatish Balay } 1633bea157c4SSatish Balay *bs_max = j; 16343a40ed3dSBarry Smith PetscFunctionReturn(0); 1635d9b7c43dSSatish Balay } 1636d9b7c43dSSatish Balay 16374a2ae208SSatish Balay #undef __FUNCT__ 16384a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1639dfbe8321SBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1640d9b7c43dSSatish Balay { 1641d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1642dfbe8321SBarry Smith PetscErrorCode ierr; 1643c1ac3661SBarry Smith PetscInt i,j,k,count,is_n,*is_idx,*rows; 1644521d7252SBarry Smith PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 164587828ca2SBarry Smith PetscScalar zero = 0.0; 16463f1db9ecSBarry Smith MatScalar *aa; 1647d9b7c43dSSatish Balay 16483a40ed3dSBarry Smith PetscFunctionBegin; 1649d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1650b9b97703SBarry Smith ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1651d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1652d9b7c43dSSatish Balay 1653bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1654c1ac3661SBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1655bea157c4SSatish Balay sizes = rows + is_n; 1656bea157c4SSatish Balay 1657563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1658bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1659bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1660dffd3267SBarry Smith if (baij->keepzeroedrows) { 1661dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1662dffd3267SBarry Smith bs_max = is_n; 166388e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 1664dffd3267SBarry Smith } else { 1665bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 166688e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 1667dffd3267SBarry Smith } 1668b97a56abSSatish Balay ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1669bea157c4SSatish Balay 1670bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1671bea157c4SSatish Balay row = rows[j]; 167277431f27SBarry Smith if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1673bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1674bea157c4SSatish Balay aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1675dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1676bea157c4SSatish Balay if (diag) { 1677bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1678bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1679bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1680bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1681a07cd24cSSatish Balay } 1682563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1683bea157c4SSatish Balay for (k=0; k<bs; k++) { 1684bea157c4SSatish Balay ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1685bea157c4SSatish Balay } 1686bea157c4SSatish Balay } else { /* (!diag) */ 1687bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1688bea157c4SSatish Balay } /* end (!diag) */ 1689bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1690aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 1691634064b4SBarry Smith if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1692bea157c4SSatish Balay #endif 1693bea157c4SSatish Balay for (k=0; k<count; k++) { 1694d9b7c43dSSatish Balay aa[0] = zero; 1695d9b7c43dSSatish Balay aa += bs; 1696d9b7c43dSSatish Balay } 1697d9b7c43dSSatish Balay if (diag) { 1698f830108cSBarry Smith ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1699d9b7c43dSSatish Balay } 1700d9b7c43dSSatish Balay } 1701bea157c4SSatish Balay } 1702bea157c4SSatish Balay 1703606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 17049a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17053a40ed3dSBarry Smith PetscFunctionReturn(0); 1706d9b7c43dSSatish Balay } 17071c351548SSatish Balay 17084a2ae208SSatish Balay #undef __FUNCT__ 17094a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 1710c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 17112d61bbb3SSatish Balay { 17122d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1713e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1714c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1715521d7252SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 17166849ba73SBarry Smith PetscErrorCode ierr; 1717c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 1718273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 17193f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 17202d61bbb3SSatish Balay 17212d61bbb3SSatish Balay PetscFunctionBegin; 17222d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 17232d61bbb3SSatish Balay row = im[k]; brow = row/bs; 17245ef9f2a5SBarry Smith if (row < 0) continue; 17252515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 172677431f27SBarry Smith if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 17272d61bbb3SSatish Balay #endif 17282d61bbb3SSatish Balay rp = aj + ai[brow]; 17292d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 17302d61bbb3SSatish Balay rmax = imax[brow]; 17312d61bbb3SSatish Balay nrow = ailen[brow]; 17322d61bbb3SSatish Balay low = 0; 1733c71e6ed7SBarry Smith high = nrow; 17342d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 17355ef9f2a5SBarry Smith if (in[l] < 0) continue; 17362515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 173777431f27SBarry Smith if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 17382d61bbb3SSatish Balay #endif 17392d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 17402d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 17412d61bbb3SSatish Balay if (roworiented) { 17425ef9f2a5SBarry Smith value = v[l + k*n]; 17432d61bbb3SSatish Balay } else { 17442d61bbb3SSatish Balay value = v[k + l*m]; 17452d61bbb3SSatish Balay } 1746c71e6ed7SBarry Smith if (col < lastcol) low = 0; else high = nrow; 1747e2ee6c50SBarry Smith lastcol = col; 17482d61bbb3SSatish Balay while (high-low > 7) { 17492d61bbb3SSatish Balay t = (low+high)/2; 17502d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 17512d61bbb3SSatish Balay else low = t; 17522d61bbb3SSatish Balay } 17532d61bbb3SSatish Balay for (i=low; i<high; i++) { 17542d61bbb3SSatish Balay if (rp[i] > bcol) break; 17552d61bbb3SSatish Balay if (rp[i] == bcol) { 17562d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 17572d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 17582d61bbb3SSatish Balay else *bap = value; 17592d61bbb3SSatish Balay goto noinsert1; 17602d61bbb3SSatish Balay } 17612d61bbb3SSatish Balay } 17622d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 176377431f27SBarry Smith else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 17642d61bbb3SSatish Balay if (nrow >= rmax) { 17652d61bbb3SSatish Balay /* there is no extra room in row, therefore enlarge */ 1766c1ac3661SBarry Smith PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 17673f1db9ecSBarry Smith MatScalar *new_a; 17682d61bbb3SSatish Balay 176977431f27SBarry Smith if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 17702d61bbb3SSatish Balay 17712d61bbb3SSatish Balay /* Malloc new storage space */ 1772c1ac3661SBarry Smith len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1773b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1774c1ac3661SBarry Smith new_j = (PetscInt*)(new_a + bs2*new_nz); 17752d61bbb3SSatish Balay new_i = new_j + new_nz; 17762d61bbb3SSatish Balay 17772d61bbb3SSatish Balay /* copy over old data into new slots */ 17782d61bbb3SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 17792d61bbb3SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1780c1ac3661SBarry Smith ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 17812d61bbb3SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1782c1ac3661SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1783549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1784549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1785549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 17862d61bbb3SSatish Balay /* free up old matrix storage */ 1787606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1788606d414cSSatish Balay if (!a->singlemalloc) { 1789606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1790606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1791606d414cSSatish Balay } 17922d61bbb3SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1793c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 17942d61bbb3SSatish Balay 17952d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 17962d61bbb3SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 179752e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 17982d61bbb3SSatish Balay a->maxnz += bs2*CHUNKSIZE; 17992d61bbb3SSatish Balay a->reallocs++; 18002d61bbb3SSatish Balay a->nz++; 18012d61bbb3SSatish Balay } 18022d61bbb3SSatish Balay N = nrow++ - 1; 18032d61bbb3SSatish Balay /* shift up all the later entries in this row */ 18042d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 18052d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1806549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 18072d61bbb3SSatish Balay } 1808549d3d68SSatish Balay if (N>=i) { 1809549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1810549d3d68SSatish Balay } 18112d61bbb3SSatish Balay rp[i] = bcol; 18122d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 18132d61bbb3SSatish Balay noinsert1:; 18142d61bbb3SSatish Balay low = i; 18152d61bbb3SSatish Balay } 18162d61bbb3SSatish Balay ailen[brow] = nrow; 18172d61bbb3SSatish Balay } 181888e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 18192d61bbb3SSatish Balay PetscFunctionReturn(0); 18202d61bbb3SSatish Balay } 18212d61bbb3SSatish Balay 18222d61bbb3SSatish Balay 18234a2ae208SSatish Balay #undef __FUNCT__ 18244a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1825dfbe8321SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 18262d61bbb3SSatish Balay { 18272d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 18282d61bbb3SSatish Balay Mat outA; 1829dfbe8321SBarry Smith PetscErrorCode ierr; 1830667159a5SBarry Smith PetscTruth row_identity,col_identity; 18312d61bbb3SSatish Balay 18322d61bbb3SSatish Balay PetscFunctionBegin; 1833d3d32019SBarry Smith if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1834667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1835667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1836667159a5SBarry Smith if (!row_identity || !col_identity) { 1837634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1838667159a5SBarry Smith } 18392d61bbb3SSatish Balay 18402d61bbb3SSatish Balay outA = inA; 18412d61bbb3SSatish Balay inA->factor = FACTOR_LU; 18422d61bbb3SSatish Balay 18432d61bbb3SSatish Balay if (!a->diag) { 1844c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 18452d61bbb3SSatish Balay } 1846cf242676SKris Buschelman 1847c38d4ed2SBarry Smith a->row = row; 1848c38d4ed2SBarry Smith a->col = col; 1849c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1850c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1851c38d4ed2SBarry Smith 1852c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 18534c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 185452e6d16bSBarry Smith ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1855c38d4ed2SBarry Smith 1856cf242676SKris Buschelman /* 1857cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1858cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1859cf242676SKris Buschelman */ 1860521d7252SBarry Smith if (inA->bs < 8) { 1861cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1862cf242676SKris Buschelman } else { 1863c38d4ed2SBarry Smith if (!a->solve_work) { 1864521d7252SBarry Smith ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 186552e6d16bSBarry Smith ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1866c38d4ed2SBarry Smith } 18672d61bbb3SSatish Balay } 18682d61bbb3SSatish Balay 1869af281ebdSHong Zhang ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1870667159a5SBarry Smith 18712d61bbb3SSatish Balay PetscFunctionReturn(0); 18722d61bbb3SSatish Balay } 18734a2ae208SSatish Balay #undef __FUNCT__ 18744a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1875dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1876ba4ca20aSSatish Balay { 1877c38d4ed2SBarry Smith static PetscTruth called = PETSC_FALSE; 1878ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1879dfbe8321SBarry Smith PetscErrorCode ierr; 1880ba4ca20aSSatish Balay 18813a40ed3dSBarry Smith PetscFunctionBegin; 1882c38d4ed2SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1883d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1884d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 18853a40ed3dSBarry Smith PetscFunctionReturn(0); 1886ba4ca20aSSatish Balay } 1887d9b7c43dSSatish Balay 1888fb2e594dSBarry Smith EXTERN_C_BEGIN 18894a2ae208SSatish Balay #undef __FUNCT__ 18904a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1891c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 189227a8da17SBarry Smith { 189327a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1894c1ac3661SBarry Smith PetscInt i,nz,nbs; 189527a8da17SBarry Smith 189627a8da17SBarry Smith PetscFunctionBegin; 189714db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 189814db4f74SSatish Balay nbs = baij->nbs; 189927a8da17SBarry Smith for (i=0; i<nz; i++) { 190027a8da17SBarry Smith baij->j[i] = indices[i]; 190127a8da17SBarry Smith } 190227a8da17SBarry Smith baij->nz = nz; 190314db4f74SSatish Balay for (i=0; i<nbs; i++) { 190427a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 190527a8da17SBarry Smith } 190627a8da17SBarry Smith 190727a8da17SBarry Smith PetscFunctionReturn(0); 190827a8da17SBarry Smith } 1909fb2e594dSBarry Smith EXTERN_C_END 191027a8da17SBarry Smith 19114a2ae208SSatish Balay #undef __FUNCT__ 19124a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 191327a8da17SBarry Smith /*@ 191427a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 191527a8da17SBarry Smith in the matrix. 191627a8da17SBarry Smith 191727a8da17SBarry Smith Input Parameters: 191827a8da17SBarry Smith + mat - the SeqBAIJ matrix 191927a8da17SBarry Smith - indices - the column indices 192027a8da17SBarry Smith 192115091d37SBarry Smith Level: advanced 192215091d37SBarry Smith 192327a8da17SBarry Smith Notes: 192427a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 192527a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 192627a8da17SBarry Smith of the MatSetValues() operation. 192727a8da17SBarry Smith 192827a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 1929d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 193027a8da17SBarry Smith 193127a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 193227a8da17SBarry Smith 193327a8da17SBarry Smith @*/ 1934c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 193527a8da17SBarry Smith { 1936c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt *); 193727a8da17SBarry Smith 193827a8da17SBarry Smith PetscFunctionBegin; 19394482741eSBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 19404482741eSBarry Smith PetscValidPointer(indices,2); 1941c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 194227a8da17SBarry Smith if (f) { 194327a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 194427a8da17SBarry Smith } else { 1945634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 194627a8da17SBarry Smith } 194727a8da17SBarry Smith PetscFunctionReturn(0); 194827a8da17SBarry Smith } 194927a8da17SBarry Smith 19504a2ae208SSatish Balay #undef __FUNCT__ 19514a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1952dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1953273d9f13SBarry Smith { 1954273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1955dfbe8321SBarry Smith PetscErrorCode ierr; 1956c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1957273d9f13SBarry Smith PetscReal atmp; 195887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1959273d9f13SBarry Smith MatScalar *aa; 1960c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 1961273d9f13SBarry Smith 1962273d9f13SBarry Smith PetscFunctionBegin; 1963273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1964521d7252SBarry Smith bs = A->bs; 1965273d9f13SBarry Smith aa = a->a; 1966273d9f13SBarry Smith ai = a->i; 1967273d9f13SBarry Smith aj = a->j; 1968273d9f13SBarry Smith mbs = a->mbs; 1969273d9f13SBarry Smith 1970273d9f13SBarry Smith ierr = VecSet(&zero,v);CHKERRQ(ierr); 19711ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1972273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1973273d9f13SBarry Smith if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1974273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1975273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1976273d9f13SBarry Smith brow = bs*i; 1977273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1978273d9f13SBarry Smith /* bcol = bs*(*aj); */ 1979273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1980273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1981273d9f13SBarry Smith atmp = PetscAbsScalar(*aa); aa++; 1982273d9f13SBarry Smith row = brow + krow; /* row index */ 1983273d9f13SBarry Smith /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1984273d9f13SBarry Smith if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1985273d9f13SBarry Smith } 1986273d9f13SBarry Smith } 1987273d9f13SBarry Smith aj++; 1988273d9f13SBarry Smith } 1989273d9f13SBarry Smith } 19901ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1991273d9f13SBarry Smith PetscFunctionReturn(0); 1992273d9f13SBarry Smith } 1993273d9f13SBarry Smith 19944a2ae208SSatish Balay #undef __FUNCT__ 19954a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1996dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1997273d9f13SBarry Smith { 1998dfbe8321SBarry Smith PetscErrorCode ierr; 1999273d9f13SBarry Smith 2000273d9f13SBarry Smith PetscFunctionBegin; 2001273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 2002273d9f13SBarry Smith PetscFunctionReturn(0); 2003273d9f13SBarry Smith } 2004273d9f13SBarry Smith 20054a2ae208SSatish Balay #undef __FUNCT__ 20064a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 2007dfbe8321SBarry Smith PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2008f2a5309cSSatish Balay { 2009f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2010f2a5309cSSatish Balay PetscFunctionBegin; 2011f2a5309cSSatish Balay *array = a->a; 2012f2a5309cSSatish Balay PetscFunctionReturn(0); 2013f2a5309cSSatish Balay } 2014f2a5309cSSatish Balay 20154a2ae208SSatish Balay #undef __FUNCT__ 20164a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2017dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2018f2a5309cSSatish Balay { 2019f2a5309cSSatish Balay PetscFunctionBegin; 2020f2a5309cSSatish Balay PetscFunctionReturn(0); 2021f2a5309cSSatish Balay } 2022f2a5309cSSatish Balay 202342ee4b1aSHong Zhang #include "petscblaslapack.h" 202442ee4b1aSHong Zhang #undef __FUNCT__ 202542ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 2026dfbe8321SBarry Smith PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 202742ee4b1aSHong Zhang { 202842ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2029dfbe8321SBarry Smith PetscErrorCode ierr; 2030521d7252SBarry Smith PetscInt i,bs=Y->bs,j,bs2; 20314ce68768SBarry Smith PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 203242ee4b1aSHong Zhang 203342ee4b1aSHong Zhang PetscFunctionBegin; 203442ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 203571044d3cSBarry Smith BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2036c537a176SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2037c4319e64SHong Zhang if (y->xtoy && y->XtoY != X) { 2038c4319e64SHong Zhang ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2039c4319e64SHong Zhang ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2040c537a176SHong Zhang } 2041c4319e64SHong Zhang if (!y->xtoy) { /* get xtoy */ 2042c4319e64SHong Zhang ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2043c4319e64SHong Zhang y->XtoY = X; 2044c537a176SHong Zhang } 2045c4319e64SHong Zhang bs2 = bs*bs; 2046c537a176SHong Zhang for (i=0; i<x->nz; i++) { 2047c4319e64SHong Zhang j = 0; 2048c4319e64SHong Zhang while (j < bs2){ 20496550863bSHong Zhang y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2050c4319e64SHong Zhang j++; 2051c537a176SHong Zhang } 2052c4319e64SHong Zhang } 205363ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 205442ee4b1aSHong Zhang } else { 205542ee4b1aSHong Zhang ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 205642ee4b1aSHong Zhang } 205742ee4b1aSHong Zhang PetscFunctionReturn(0); 205842ee4b1aSHong Zhang } 205942ee4b1aSHong Zhang 20602593348eSBarry Smith /* -------------------------------------------------------------------*/ 2061cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2062cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2063cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2064cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 206597304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 20667c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 20677c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2068cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 2069cc2dc46cSBarry Smith 0, 2070cc2dc46cSBarry Smith 0, 207197304618SKris Buschelman /*10*/ 0, 2072cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2073cc2dc46cSBarry Smith 0, 2074b6490206SBarry Smith 0, 2075f2501298SSatish Balay MatTranspose_SeqBAIJ, 207697304618SKris Buschelman /*15*/ MatGetInfo_SeqBAIJ, 2077cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2078cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2079cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2080cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 208197304618SKris Buschelman /*20*/ 0, 2082cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2083cc2dc46cSBarry Smith 0, 2084cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2085cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 208697304618SKris Buschelman /*25*/ MatZeroRows_SeqBAIJ, 2087cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 2088cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 2089c05c3958SHong Zhang MatCholeskyFactorSymbolic_SeqBAIJ, 2090c05c3958SHong Zhang MatCholeskyFactorNumeric_SeqBAIJ_N, 209197304618SKris Buschelman /*30*/ MatSetUpPreallocation_SeqBAIJ, 2092cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 2093c05c3958SHong Zhang MatICCFactorSymbolic_SeqBAIJ, 2094f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 2095f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 209697304618SKris Buschelman /*35*/ MatDuplicate_SeqBAIJ, 2097cc2dc46cSBarry Smith 0, 2098cc2dc46cSBarry Smith 0, 2099cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2100cc2dc46cSBarry Smith 0, 210197304618SKris Buschelman /*40*/ MatAXPY_SeqBAIJ, 2102cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 2103cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2104cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 2105cc2dc46cSBarry Smith 0, 210697304618SKris Buschelman /*45*/ MatPrintHelp_SeqBAIJ, 2107cc2dc46cSBarry Smith MatScale_SeqBAIJ, 2108cc2dc46cSBarry Smith 0, 2109cc2dc46cSBarry Smith 0, 2110cc2dc46cSBarry Smith 0, 2111521d7252SBarry Smith /*50*/ 0, 21123b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 211392c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 2114cc2dc46cSBarry Smith 0, 2115cc2dc46cSBarry Smith 0, 211697304618SKris Buschelman /*55*/ 0, 2117cc2dc46cSBarry Smith 0, 2118cc2dc46cSBarry Smith 0, 2119cc2dc46cSBarry Smith 0, 2120d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 212197304618SKris Buschelman /*60*/ MatGetSubMatrix_SeqBAIJ, 2122b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2123b9b97703SBarry Smith MatView_SeqBAIJ, 21243a64cc32SBarry Smith MatGetPetscMaps_Petsc, 2125273d9f13SBarry Smith 0, 212697304618SKris Buschelman /*65*/ 0, 2127273d9f13SBarry Smith 0, 2128273d9f13SBarry Smith 0, 2129273d9f13SBarry Smith 0, 2130273d9f13SBarry Smith 0, 213197304618SKris Buschelman /*70*/ MatGetRowMax_SeqBAIJ, 213297304618SKris Buschelman MatConvert_Basic, 2133273d9f13SBarry Smith 0, 213497304618SKris Buschelman 0, 213597304618SKris Buschelman 0, 213697304618SKris Buschelman /*75*/ 0, 213797304618SKris Buschelman 0, 213897304618SKris Buschelman 0, 213997304618SKris Buschelman 0, 214097304618SKris Buschelman 0, 214197304618SKris Buschelman /*80*/ 0, 214297304618SKris Buschelman 0, 214397304618SKris Buschelman 0, 214497304618SKris Buschelman 0, 2145865e5f61SKris Buschelman MatLoad_SeqBAIJ, 2146865e5f61SKris Buschelman /*85*/ 0, 2147b01c7715SBarry Smith 0, 2148b01c7715SBarry Smith 0, 2149b01c7715SBarry Smith 0, 2150865e5f61SKris Buschelman 0, 2151865e5f61SKris Buschelman /*90*/ 0, 2152865e5f61SKris Buschelman 0, 2153865e5f61SKris Buschelman 0, 2154865e5f61SKris Buschelman 0, 2155865e5f61SKris Buschelman 0, 2156865e5f61SKris Buschelman /*95*/ 0, 2157865e5f61SKris Buschelman 0, 2158865e5f61SKris Buschelman 0, 2159865e5f61SKris Buschelman 0}; 21602593348eSBarry Smith 21613e90b805SBarry Smith EXTERN_C_BEGIN 21624a2ae208SSatish Balay #undef __FUNCT__ 21634a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2164dfbe8321SBarry Smith PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 21653e90b805SBarry Smith { 21663e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2167521d7252SBarry Smith PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2168dfbe8321SBarry Smith PetscErrorCode ierr; 21693e90b805SBarry Smith 21703e90b805SBarry Smith PetscFunctionBegin; 21713e90b805SBarry Smith if (aij->nonew != 1) { 2172634064b4SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 21733e90b805SBarry Smith } 21743e90b805SBarry Smith 21753e90b805SBarry Smith /* allocate space for values if not already there */ 21763e90b805SBarry Smith if (!aij->saved_values) { 217787828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 21783e90b805SBarry Smith } 21793e90b805SBarry Smith 21803e90b805SBarry Smith /* copy values over */ 218187828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 21823e90b805SBarry Smith PetscFunctionReturn(0); 21833e90b805SBarry Smith } 21843e90b805SBarry Smith EXTERN_C_END 21853e90b805SBarry Smith 21863e90b805SBarry Smith EXTERN_C_BEGIN 21874a2ae208SSatish Balay #undef __FUNCT__ 21884a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2189dfbe8321SBarry Smith PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 21903e90b805SBarry Smith { 21913e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 21926849ba73SBarry Smith PetscErrorCode ierr; 2193521d7252SBarry Smith PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 21943e90b805SBarry Smith 21953e90b805SBarry Smith PetscFunctionBegin; 21963e90b805SBarry Smith if (aij->nonew != 1) { 2197634064b4SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 21983e90b805SBarry Smith } 21993e90b805SBarry Smith if (!aij->saved_values) { 2200634064b4SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 22013e90b805SBarry Smith } 22023e90b805SBarry Smith 22033e90b805SBarry Smith /* copy values over */ 220487828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 22053e90b805SBarry Smith PetscFunctionReturn(0); 22063e90b805SBarry Smith } 22073e90b805SBarry Smith EXTERN_C_END 22083e90b805SBarry Smith 2209273d9f13SBarry Smith EXTERN_C_BEGIN 2210ceb03754SKris Buschelman extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2211ceb03754SKris Buschelman extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2212273d9f13SBarry Smith EXTERN_C_END 2213273d9f13SBarry Smith 2214273d9f13SBarry Smith EXTERN_C_BEGIN 22154a2ae208SSatish Balay #undef __FUNCT__ 2216a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2217c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2218a23d5eceSKris Buschelman { 2219a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 22206849ba73SBarry Smith PetscErrorCode ierr; 2221c1ac3661SBarry Smith PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2222a23d5eceSKris Buschelman PetscTruth flg; 2223a23d5eceSKris Buschelman 2224a23d5eceSKris Buschelman PetscFunctionBegin; 2225a23d5eceSKris Buschelman 2226a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 2227a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2228a23d5eceSKris Buschelman if (nnz && newbs != bs) { 2229634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2230a23d5eceSKris Buschelman } 2231a23d5eceSKris Buschelman bs = newbs; 2232a23d5eceSKris Buschelman 2233a23d5eceSKris Buschelman mbs = B->m/bs; 2234a23d5eceSKris Buschelman nbs = B->n/bs; 2235a23d5eceSKris Buschelman bs2 = bs*bs; 2236a23d5eceSKris Buschelman 2237a23d5eceSKris Buschelman if (mbs*bs!=B->m || nbs*bs!=B->n) { 223877431f27SBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2239a23d5eceSKris Buschelman } 2240a23d5eceSKris Buschelman 2241a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 224277431f27SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2243a23d5eceSKris Buschelman if (nnz) { 2244a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 224577431f27SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 224677431f27SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2247a23d5eceSKris Buschelman } 2248a23d5eceSKris Buschelman } 2249a23d5eceSKris Buschelman 2250a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 2251a23d5eceSKris Buschelman ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2252a23d5eceSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 2253a23d5eceSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2254a23d5eceSKris Buschelman if (!flg) { 2255a23d5eceSKris Buschelman switch (bs) { 2256a23d5eceSKris Buschelman case 1: 2257a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2258a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2259a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2260a23d5eceSKris Buschelman break; 2261a23d5eceSKris Buschelman case 2: 2262a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2263a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2264a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2265b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2266a23d5eceSKris Buschelman break; 2267a23d5eceSKris Buschelman case 3: 2268a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2269a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2270a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2271b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2272a23d5eceSKris Buschelman break; 2273a23d5eceSKris Buschelman case 4: 2274a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2275a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2276a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2277b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2278a23d5eceSKris Buschelman break; 2279a23d5eceSKris Buschelman case 5: 2280a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2281a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2282a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2283b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2284a23d5eceSKris Buschelman break; 2285a23d5eceSKris Buschelman case 6: 2286a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2287a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2288a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2289a23d5eceSKris Buschelman break; 2290a23d5eceSKris Buschelman case 7: 2291a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2292a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2293a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2294a23d5eceSKris Buschelman break; 2295a23d5eceSKris Buschelman default: 2296a23d5eceSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2297a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2298a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2299a23d5eceSKris Buschelman break; 2300a23d5eceSKris Buschelman } 2301a23d5eceSKris Buschelman } 2302521d7252SBarry Smith B->bs = bs; 2303a23d5eceSKris Buschelman b->mbs = mbs; 2304a23d5eceSKris Buschelman b->nbs = nbs; 2305c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2306a23d5eceSKris Buschelman if (!nnz) { 2307a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2308a23d5eceSKris Buschelman else if (nz <= 0) nz = 1; 2309a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 2310a23d5eceSKris Buschelman nz = nz*mbs; 2311a23d5eceSKris Buschelman } else { 2312a23d5eceSKris Buschelman nz = 0; 2313a23d5eceSKris Buschelman for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2314a23d5eceSKris Buschelman } 2315a23d5eceSKris Buschelman 2316a23d5eceSKris Buschelman /* allocate the matrix space */ 2317c1ac3661SBarry Smith len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2318a23d5eceSKris Buschelman ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2319a23d5eceSKris Buschelman ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2320c1ac3661SBarry Smith b->j = (PetscInt*)(b->a + nz*bs2); 2321c1ac3661SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2322a23d5eceSKris Buschelman b->i = b->j + nz; 2323a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 2324a23d5eceSKris Buschelman 2325a23d5eceSKris Buschelman b->i[0] = 0; 2326a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 2327a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 2328a23d5eceSKris Buschelman } 2329a23d5eceSKris Buschelman 2330a23d5eceSKris Buschelman /* b->ilen will count nonzeros in each block row so far. */ 2331c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 233252e6d16bSBarry Smith ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2333a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2334a23d5eceSKris Buschelman 2335521d7252SBarry Smith B->bs = bs; 2336a23d5eceSKris Buschelman b->bs2 = bs2; 2337a23d5eceSKris Buschelman b->mbs = mbs; 2338a23d5eceSKris Buschelman b->nz = 0; 2339a23d5eceSKris Buschelman b->maxnz = nz*bs2; 2340a23d5eceSKris Buschelman B->info.nz_unneeded = (PetscReal)b->maxnz; 2341a23d5eceSKris Buschelman PetscFunctionReturn(0); 2342a23d5eceSKris Buschelman } 2343a23d5eceSKris Buschelman EXTERN_C_END 2344a23d5eceSKris Buschelman 23450bad9183SKris Buschelman /*MC 2346fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 23470bad9183SKris Buschelman block sparse compressed row format. 23480bad9183SKris Buschelman 23490bad9183SKris Buschelman Options Database Keys: 23500bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 23510bad9183SKris Buschelman 23520bad9183SKris Buschelman Level: beginner 23530bad9183SKris Buschelman 23540bad9183SKris Buschelman .seealso: MatCreateSeqBAIJ 23550bad9183SKris Buschelman M*/ 23560bad9183SKris Buschelman 2357a23d5eceSKris Buschelman EXTERN_C_BEGIN 2358a23d5eceSKris Buschelman #undef __FUNCT__ 23594a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 2360dfbe8321SBarry Smith PetscErrorCode MatCreate_SeqBAIJ(Mat B) 23612593348eSBarry Smith { 2362dfbe8321SBarry Smith PetscErrorCode ierr; 2363c1ac3661SBarry Smith PetscMPIInt size; 2364b6490206SBarry Smith Mat_SeqBAIJ *b; 23653b2fbd54SBarry Smith 23663a40ed3dSBarry Smith PetscFunctionBegin; 2367273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 236829bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2369b6490206SBarry Smith 2370273d9f13SBarry Smith B->m = B->M = PetscMax(B->m,B->M); 2371273d9f13SBarry Smith B->n = B->N = PetscMax(B->n,B->N); 2372b0a32e0cSBarry Smith ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2373b0a32e0cSBarry Smith B->data = (void*)b; 2374549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 23752593348eSBarry Smith B->factor = 0; 237690f02eecSBarry Smith B->mapping = 0; 23772593348eSBarry Smith b->row = 0; 23782593348eSBarry Smith b->col = 0; 2379e51c0b9cSSatish Balay b->icol = 0; 23802593348eSBarry Smith b->reallocs = 0; 23813e90b805SBarry Smith b->saved_values = 0; 2382cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2383563b5814SBarry Smith b->setvalueslen = 0; 2384563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 2385563b5814SBarry Smith #endif 23862593348eSBarry Smith 23873a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 23883a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2389a5ae1ecdSBarry Smith 2390c4992f7dSBarry Smith b->sorted = PETSC_FALSE; 2391c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 23922593348eSBarry Smith b->nonew = 0; 23932593348eSBarry Smith b->diag = 0; 23942593348eSBarry Smith b->solve_work = 0; 2395de6a44a3SBarry Smith b->mult_work = 0; 23962a1b7f2aSHong Zhang B->spptr = 0; 23970e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2398883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 2399c4319e64SHong Zhang b->xtoy = 0; 2400c4319e64SHong Zhang b->XtoY = 0; 240173e7a558SHong Zhang b->compressedrow.use = PETSC_FALSE; 240226e093fcSHong Zhang b->compressedrow.nrows = 0; 240373e7a558SHong Zhang b->compressedrow.i = PETSC_NULL; 240473e7a558SHong Zhang b->compressedrow.rindex = PETSC_NULL; 240573e7a558SHong Zhang b->compressedrow.checked = PETSC_FALSE; 240688e51ccdSHong Zhang B->same_nonzero = PETSC_FALSE; 24074e220ebcSLois Curfman McInnes 2408*43516a2dSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2409*43516a2dSKris Buschelman "MatInvertBlockDiagonal_SeqBAIJ", 2410*43516a2dSKris Buschelman MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2411f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 24123e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 2413bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2414f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 24153e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 2416bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2417f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 241827a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2419bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2420a6175056SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2421273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 2422273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2423b24ad042SBarry Smith #if !defined(PETSC_USE_64BIT_INT) 2424a0e1a404SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2425a0e1a404SHong Zhang "MatConvert_SeqBAIJ_SeqSBAIJ", 2426a0e1a404SHong Zhang MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2427b24ad042SBarry Smith #endif 2428a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2429a23d5eceSKris Buschelman "MatSeqBAIJSetPreallocation_SeqBAIJ", 2430a23d5eceSKris Buschelman MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 24313a40ed3dSBarry Smith PetscFunctionReturn(0); 24322593348eSBarry Smith } 2433273d9f13SBarry Smith EXTERN_C_END 24342593348eSBarry Smith 24354a2ae208SSatish Balay #undef __FUNCT__ 24364a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2437dfbe8321SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 24382593348eSBarry Smith { 24392593348eSBarry Smith Mat C; 2440b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 24416849ba73SBarry Smith PetscErrorCode ierr; 2442c1ac3661SBarry Smith PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2443de6a44a3SBarry Smith 24443a40ed3dSBarry Smith PetscFunctionBegin; 244529bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 24462593348eSBarry Smith 24472593348eSBarry Smith *B = 0; 2448273d9f13SBarry Smith ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2449be5d1d56SKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 24501d5dac46SHong Zhang ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2451273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 245244cd7ae7SLois Curfman McInnes 24534beb1cfeSHong Zhang C->M = A->M; 24544beb1cfeSHong Zhang C->N = A->N; 2455521d7252SBarry Smith C->bs = A->bs; 24569df24120SSatish Balay c->bs2 = a->bs2; 2457b6490206SBarry Smith c->mbs = a->mbs; 2458b6490206SBarry Smith c->nbs = a->nbs; 24592593348eSBarry Smith 2460c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2461690b6cddSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2462b6490206SBarry Smith for (i=0; i<mbs; i++) { 24632593348eSBarry Smith c->imax[i] = a->imax[i]; 24642593348eSBarry Smith c->ilen[i] = a->ilen[i]; 24652593348eSBarry Smith } 24662593348eSBarry Smith 24672593348eSBarry Smith /* allocate the matrix space */ 2468c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 2469c1ac3661SBarry Smith len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2470b0a32e0cSBarry Smith ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2471690b6cddSBarry Smith c->j = (PetscInt*)(c->a + nz*bs2); 2472de6a44a3SBarry Smith c->i = c->j + nz; 2473c1ac3661SBarry Smith ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2474b6490206SBarry Smith if (mbs > 0) { 2475c1ac3661SBarry Smith ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 24762e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 2477549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 24782e8a6d31SBarry Smith } else { 2479549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 24802593348eSBarry Smith } 24812593348eSBarry Smith } 24822593348eSBarry Smith 248352e6d16bSBarry Smith ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 24842593348eSBarry Smith c->sorted = a->sorted; 24852593348eSBarry Smith c->roworiented = a->roworiented; 24862593348eSBarry Smith c->nonew = a->nonew; 24872593348eSBarry Smith 24882593348eSBarry Smith if (a->diag) { 2489c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 249052e6d16bSBarry Smith ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2491b6490206SBarry Smith for (i=0; i<mbs; i++) { 24922593348eSBarry Smith c->diag[i] = a->diag[i]; 24932593348eSBarry Smith } 249498305bb5SBarry Smith } else c->diag = 0; 24952593348eSBarry Smith c->nz = a->nz; 24962593348eSBarry Smith c->maxnz = a->maxnz; 24972593348eSBarry Smith c->solve_work = 0; 24987fc0212eSBarry Smith c->mult_work = 0; 2499273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 2500273d9f13SBarry Smith C->assembled = PETSC_TRUE; 250188e51ccdSHong Zhang 250288e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 250388e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 250488e51ccdSHong Zhang c->compressedrow.checked = a->compressedrow.checked; 250588e51ccdSHong Zhang if ( a->compressedrow.checked && a->compressedrow.use){ 250688e51ccdSHong Zhang i = a->compressedrow.nrows; 250788e51ccdSHong Zhang ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 250888e51ccdSHong Zhang c->compressedrow.rindex = c->compressedrow.i + i + 1; 250988e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 251088e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 251188e51ccdSHong Zhang } else { 251288e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 251388e51ccdSHong Zhang c->compressedrow.i = PETSC_NULL; 251488e51ccdSHong Zhang c->compressedrow.rindex = PETSC_NULL; 251588e51ccdSHong Zhang } 251688e51ccdSHong Zhang C->same_nonzero = A->same_nonzero; 25172593348eSBarry Smith *B = C; 2518b0a32e0cSBarry Smith ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 25193a40ed3dSBarry Smith PetscFunctionReturn(0); 25202593348eSBarry Smith } 25212593348eSBarry Smith 25224a2ae208SSatish Balay #undef __FUNCT__ 25234a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 2524dfbe8321SBarry Smith PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 25252593348eSBarry Smith { 2526b6490206SBarry Smith Mat_SeqBAIJ *a; 25272593348eSBarry Smith Mat B; 25286849ba73SBarry Smith PetscErrorCode ierr; 2529b24ad042SBarry Smith PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2530c1ac3661SBarry Smith PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2531c1ac3661SBarry Smith PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2532c1ac3661SBarry Smith PetscInt *masked,nmask,tmp,bs2,ishift; 2533b24ad042SBarry Smith PetscMPIInt size; 2534b24ad042SBarry Smith int fd; 253587828ca2SBarry Smith PetscScalar *aa; 253619bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 25372593348eSBarry Smith 25383a40ed3dSBarry Smith PetscFunctionBegin; 2539b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2540de6a44a3SBarry Smith bs2 = bs*bs; 2541b6490206SBarry Smith 2542d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254329bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2544b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 25450752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2546552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 25472593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 25482593348eSBarry Smith 2549d64ed03dSBarry Smith if (header[3] < 0) { 255029bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2551d64ed03dSBarry Smith } 2552d64ed03dSBarry Smith 255329bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 255435aab85fSBarry Smith 255535aab85fSBarry Smith /* 255635aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 255735aab85fSBarry Smith divisible by the blocksize 255835aab85fSBarry Smith */ 2559b6490206SBarry Smith mbs = M/bs; 256035aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 256135aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 256235aab85fSBarry Smith else mbs++; 256335aab85fSBarry Smith if (extra_rows) { 256463ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 256535aab85fSBarry Smith } 2566b6490206SBarry Smith 25672593348eSBarry Smith /* read in row lengths */ 2568c1ac3661SBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 25690752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 257035aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 25712593348eSBarry Smith 2572b6490206SBarry Smith /* read in column indices */ 2573c1ac3661SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 25740752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 257535aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2576b6490206SBarry Smith 2577b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2578c1ac3661SBarry Smith ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2579c1ac3661SBarry Smith ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2580c1ac3661SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2581c1ac3661SBarry Smith ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 258235aab85fSBarry Smith masked = mask + mbs; 2583b6490206SBarry Smith rowcount = 0; nzcount = 0; 2584b6490206SBarry Smith for (i=0; i<mbs; i++) { 258535aab85fSBarry Smith nmask = 0; 2586b6490206SBarry Smith for (j=0; j<bs; j++) { 2587b6490206SBarry Smith kmax = rowlengths[rowcount]; 2588b6490206SBarry Smith for (k=0; k<kmax; k++) { 258935aab85fSBarry Smith tmp = jj[nzcount++]/bs; 259035aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2591b6490206SBarry Smith } 2592b6490206SBarry Smith rowcount++; 2593b6490206SBarry Smith } 259435aab85fSBarry Smith browlengths[i] += nmask; 259535aab85fSBarry Smith /* zero out the mask elements we set */ 259635aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2597b6490206SBarry Smith } 2598b6490206SBarry Smith 25992593348eSBarry Smith /* create our matrix */ 2600dd23797bSKris Buschelman ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 260178ae41b4SKris Buschelman ierr = MatSetType(B,type);CHKERRQ(ierr); 260278ae41b4SKris Buschelman ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2603b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 26042593348eSBarry Smith 26052593348eSBarry Smith /* set matrix "i" values */ 2606de6a44a3SBarry Smith a->i[0] = 0; 2607b6490206SBarry Smith for (i=1; i<= mbs; i++) { 2608b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2609b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 26102593348eSBarry Smith } 2611b6490206SBarry Smith a->nz = 0; 2612b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 26132593348eSBarry Smith 2614b6490206SBarry Smith /* read in nonzero values */ 261587828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 26160752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 261735aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2618b6490206SBarry Smith 2619b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2620b6490206SBarry Smith nzcount = 0; jcount = 0; 2621b6490206SBarry Smith for (i=0; i<mbs; i++) { 2622b6490206SBarry Smith nzcountb = nzcount; 262335aab85fSBarry Smith nmask = 0; 2624b6490206SBarry Smith for (j=0; j<bs; j++) { 2625b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2626b6490206SBarry Smith for (k=0; k<kmax; k++) { 262735aab85fSBarry Smith tmp = jj[nzcount++]/bs; 262835aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2629b6490206SBarry Smith } 2630b6490206SBarry Smith } 2631de6a44a3SBarry Smith /* sort the masked values */ 2632433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2633de6a44a3SBarry Smith 2634b6490206SBarry Smith /* set "j" values into matrix */ 2635b6490206SBarry Smith maskcount = 1; 263635aab85fSBarry Smith for (j=0; j<nmask; j++) { 263735aab85fSBarry Smith a->j[jcount++] = masked[j]; 2638de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2639b6490206SBarry Smith } 2640b6490206SBarry Smith /* set "a" values into matrix */ 2641de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2642b6490206SBarry Smith for (j=0; j<bs; j++) { 2643b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2644b6490206SBarry Smith for (k=0; k<kmax; k++) { 2645de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2646de6a44a3SBarry Smith block = mask[tmp] - 1; 2647de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2648de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2649375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 2650b6490206SBarry Smith } 2651b6490206SBarry Smith } 265235aab85fSBarry Smith /* zero out the mask elements we set */ 265335aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2654b6490206SBarry Smith } 265529bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2656b6490206SBarry Smith 2657606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2658606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 2659606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 2660606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 2661606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 2662b6490206SBarry Smith 266378ae41b4SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 266478ae41b4SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 26659c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 266678ae41b4SKris Buschelman 266778ae41b4SKris Buschelman *A = B; 26683a40ed3dSBarry Smith PetscFunctionReturn(0); 26692593348eSBarry Smith } 26702593348eSBarry Smith 26714a2ae208SSatish Balay #undef __FUNCT__ 26724a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 2673273d9f13SBarry Smith /*@C 2674273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2675273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 2676273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 2677273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 2678273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 26792593348eSBarry Smith 2680273d9f13SBarry Smith Collective on MPI_Comm 2681273d9f13SBarry Smith 2682273d9f13SBarry Smith Input Parameters: 2683273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 2684273d9f13SBarry Smith . bs - size of block 2685273d9f13SBarry Smith . m - number of rows 2686273d9f13SBarry Smith . n - number of columns 268735d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 268835d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 2689273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 2690273d9f13SBarry Smith 2691273d9f13SBarry Smith Output Parameter: 2692273d9f13SBarry Smith . A - the matrix 2693273d9f13SBarry Smith 2694273d9f13SBarry Smith Options Database Keys: 2695273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2696273d9f13SBarry Smith block calculations (much slower) 2697273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2698273d9f13SBarry Smith 2699273d9f13SBarry Smith Level: intermediate 2700273d9f13SBarry Smith 2701273d9f13SBarry Smith Notes: 2702d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 2703d1be2dadSMatthew Knepley 270449a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 270549a6f317SBarry Smith 270635d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 270735d8aa7fSBarry Smith 2708273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 2709273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 2710273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 2711273d9f13SBarry Smith 2712273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 2713273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2714273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 2715273d9f13SBarry Smith matrices. 2716273d9f13SBarry Smith 2717273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2718273d9f13SBarry Smith @*/ 2719c1ac3661SBarry Smith PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2720273d9f13SBarry Smith { 2721dfbe8321SBarry Smith PetscErrorCode ierr; 2722273d9f13SBarry Smith 2723273d9f13SBarry Smith PetscFunctionBegin; 2724273d9f13SBarry Smith ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2725273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2726273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2727273d9f13SBarry Smith PetscFunctionReturn(0); 2728273d9f13SBarry Smith } 2729273d9f13SBarry Smith 27304a2ae208SSatish Balay #undef __FUNCT__ 27314a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2732273d9f13SBarry Smith /*@C 2733273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2734273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 2735273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 2736273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 2737273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 2738273d9f13SBarry Smith 2739273d9f13SBarry Smith Collective on MPI_Comm 2740273d9f13SBarry Smith 2741273d9f13SBarry Smith Input Parameters: 2742273d9f13SBarry Smith + A - the matrix 2743273d9f13SBarry Smith . bs - size of block 2744273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 2745273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 2746273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 2747273d9f13SBarry Smith 2748273d9f13SBarry Smith Options Database Keys: 2749273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2750273d9f13SBarry Smith block calculations (much slower) 2751273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2752273d9f13SBarry Smith 2753273d9f13SBarry Smith Level: intermediate 2754273d9f13SBarry Smith 2755273d9f13SBarry Smith Notes: 275649a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 275749a6f317SBarry Smith 2758273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 2759273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 2760273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 2761273d9f13SBarry Smith 2762273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 2763273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2764273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 2765273d9f13SBarry Smith matrices. 2766273d9f13SBarry Smith 2767273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2768273d9f13SBarry Smith @*/ 2769c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2770273d9f13SBarry Smith { 2771c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2772273d9f13SBarry Smith 2773273d9f13SBarry Smith PetscFunctionBegin; 2774a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2775a23d5eceSKris Buschelman if (f) { 2776a23d5eceSKris Buschelman ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2777273d9f13SBarry Smith } 2778273d9f13SBarry Smith PetscFunctionReturn(0); 2779273d9f13SBarry Smith } 2780a1d92eedSBarry Smith 2781