1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 233 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 250 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 251 i2 = 3; 252 idiag += 9; 253 for (i=1; i<m; i++) { 254 v = aa + 9*ai[i]; 255 vi = aj + ai[i]; 256 nz = diag[i] - ai[i]; 257 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 258 while (nz--) { 259 idx = 3*(*vi++); 260 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 261 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 262 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 263 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 264 v += 9; 265 } 266 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 267 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 268 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 269 idiag += 9; 270 i2 += 3; 271 } 272 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 273 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 274 } 275 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 276 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 277 i2 = 0; 278 mdiag = a->idiag+9*a->mbs; 279 for (i=0; i<m; i++) { 280 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 281 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 282 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 283 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 284 mdiag += 9; 285 i2 += 3; 286 } 287 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 288 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 289 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 290 } 291 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 292 idiag = a->idiag+9*a->mbs - 9; 293 i2 = 3*m - 3; 294 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 295 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 296 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 297 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 298 idiag -= 9; 299 i2 -= 3; 300 for (i=m-2; i>=0; i--) { 301 v = aa + 9*(diag[i]+1); 302 vi = aj + diag[i] + 1; 303 nz = ai[i+1] - diag[i] - 1; 304 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 305 while (nz--) { 306 idx = 3*(*vi++); 307 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 308 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 309 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 310 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 311 v += 9; 312 } 313 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 314 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 315 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 316 idiag -= 9; 317 i2 -= 3; 318 } 319 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 320 } 321 } else { 322 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 323 } 324 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 325 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 331 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 332 { 333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 334 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 335 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 336 PetscErrorCode ierr; 337 PetscInt m = a->mbs,i,i2,nz,idx; 338 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 339 340 PetscFunctionBegin; 341 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 342 its = its*lits; 343 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 344 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 345 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 346 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 347 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 348 349 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 350 351 diag = a->diag; 352 idiag = a->idiag; 353 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 355 356 if (flag & SOR_ZERO_INITIAL_GUESS) { 357 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 358 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 359 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 360 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 361 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 362 i2 = 4; 363 idiag += 16; 364 for (i=1; i<m; i++) { 365 v = aa + 16*ai[i]; 366 vi = aj + ai[i]; 367 nz = diag[i] - ai[i]; 368 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 369 while (nz--) { 370 idx = 4*(*vi++); 371 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 372 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 373 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 374 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 375 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 376 v += 16; 377 } 378 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 379 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 380 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 381 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 382 idiag += 16; 383 i2 += 4; 384 } 385 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 387 } 388 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390 i2 = 0; 391 mdiag = a->idiag+16*a->mbs; 392 for (i=0; i<m; i++) { 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 394 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 395 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 396 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 397 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 398 mdiag += 16; 399 i2 += 4; 400 } 401 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+16*a->mbs - 16; 407 i2 = 4*m - 4; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 409 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 410 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 411 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 412 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 413 idiag -= 16; 414 i2 -= 4; 415 for (i=m-2; i>=0; i--) { 416 v = aa + 16*(diag[i]+1); 417 vi = aj + diag[i] + 1; 418 nz = ai[i+1] - diag[i] - 1; 419 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 420 while (nz--) { 421 idx = 4*(*vi++); 422 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 423 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 424 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 425 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 426 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 427 v += 16; 428 } 429 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 430 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 431 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 432 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 433 idiag -= 16; 434 i2 -= 4; 435 } 436 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 437 } 438 } else { 439 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 440 } 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 448 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 449 { 450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 451 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 452 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 453 PetscErrorCode ierr; 454 PetscInt m = a->mbs,i,i2,nz,idx; 455 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 456 457 PetscFunctionBegin; 458 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 459 its = its*lits; 460 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 461 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 462 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 463 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 464 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 465 466 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 467 468 diag = a->diag; 469 idiag = a->idiag; 470 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 472 473 if (flag & SOR_ZERO_INITIAL_GUESS) { 474 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 475 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 476 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 477 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 478 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 479 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 480 i2 = 5; 481 idiag += 25; 482 for (i=1; i<m; i++) { 483 v = aa + 25*ai[i]; 484 vi = aj + ai[i]; 485 nz = diag[i] - ai[i]; 486 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 487 while (nz--) { 488 idx = 5*(*vi++); 489 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 490 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 491 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 492 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 493 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 494 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 495 v += 25; 496 } 497 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 498 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 499 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 500 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 501 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 502 idiag += 25; 503 i2 += 5; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+25*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 514 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 515 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 516 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 517 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 518 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 519 mdiag += 25; 520 i2 += 5; 521 } 522 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 523 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 524 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 525 } 526 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 527 idiag = a->idiag+25*a->mbs - 25; 528 i2 = 5*m - 5; 529 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 530 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 531 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 532 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 533 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 534 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 535 idiag -= 25; 536 i2 -= 5; 537 for (i=m-2; i>=0; i--) { 538 v = aa + 25*(diag[i]+1); 539 vi = aj + diag[i] + 1; 540 nz = ai[i+1] - diag[i] - 1; 541 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 542 while (nz--) { 543 idx = 5*(*vi++); 544 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 545 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 546 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 547 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 548 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 549 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 550 v += 25; 551 } 552 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 553 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 554 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 555 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 556 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 557 idiag -= 25; 558 i2 -= 5; 559 } 560 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 561 } 562 } else { 563 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 564 } 565 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 566 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 /* 571 Special version for direct calls from Fortran (Used in PETSc-fun3d) 572 */ 573 #if defined(PETSC_HAVE_FORTRAN_CAPS) 574 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 575 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 576 #define matsetvaluesblocked4_ matsetvaluesblocked4 577 #endif 578 579 EXTERN_C_BEGIN 580 #undef __FUNCT__ 581 #define __FUNCT__ "matsetvaluesblocked4_" 582 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 583 { 584 Mat A = *AA; 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 586 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 587 PetscInt *ai=a->i,*ailen=a->ilen; 588 PetscInt *aj=a->j,stepval,lastcol = -1; 589 const PetscScalar *value = v; 590 MatScalar *ap,*aa = a->a,*bap; 591 592 PetscFunctionBegin; 593 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 594 stepval = (n-1)*4; 595 for (k=0; k<m; k++) { /* loop over added rows */ 596 row = im[k]; 597 rp = aj + ai[row]; 598 ap = aa + 16*ai[row]; 599 nrow = ailen[row]; 600 low = 0; 601 high = nrow; 602 for (l=0; l<n; l++) { /* loop over added columns */ 603 col = in[l]; 604 if (col <= lastcol) low = 0; else high = nrow; 605 lastcol = col; 606 value = v + k*(stepval+4 + l)*4; 607 while (high-low > 7) { 608 t = (low+high)/2; 609 if (rp[t] > col) high = t; 610 else low = t; 611 } 612 for (i=low; i<high; i++) { 613 if (rp[i] > col) break; 614 if (rp[i] == col) { 615 bap = ap + 16*i; 616 for (ii=0; ii<4; ii++,value+=stepval) { 617 for (jj=ii; jj<16; jj+=4) { 618 bap[jj] += *value++; 619 } 620 } 621 goto noinsert2; 622 } 623 } 624 N = nrow++ - 1; 625 high++; /* added new column index thus must search to one higher than before */ 626 /* shift up all the later entries in this row */ 627 for (ii=N; ii>=i; ii--) { 628 rp[ii+1] = rp[ii]; 629 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 630 } 631 if (N >= i) { 632 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 633 } 634 rp[i] = col; 635 bap = ap + 16*i; 636 for (ii=0; ii<4; ii++,value+=stepval) { 637 for (jj=ii; jj<16; jj+=4) { 638 bap[jj] = *value++; 639 } 640 } 641 noinsert2:; 642 low = i; 643 } 644 ailen[row] = nrow; 645 } 646 PetscFunctionReturnVoid(); 647 } 648 EXTERN_C_END 649 650 #if defined(PETSC_HAVE_FORTRAN_CAPS) 651 #define matsetvalues4_ MATSETVALUES4 652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 653 #define matsetvalues4_ matsetvalues4 654 #endif 655 656 EXTERN_C_BEGIN 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetValues4_" 659 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 660 { 661 Mat A = *AA; 662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 663 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 664 PetscInt *ai=a->i,*ailen=a->ilen; 665 PetscInt *aj=a->j,brow,bcol; 666 PetscInt ridx,cidx,lastcol = -1; 667 MatScalar *ap,value,*aa=a->a,*bap; 668 669 PetscFunctionBegin; 670 for (k=0; k<m; k++) { /* loop over added rows */ 671 row = im[k]; brow = row/4; 672 rp = aj + ai[brow]; 673 ap = aa + 16*ai[brow]; 674 nrow = ailen[brow]; 675 low = 0; 676 high = nrow; 677 for (l=0; l<n; l++) { /* loop over added columns */ 678 col = in[l]; bcol = col/4; 679 ridx = row % 4; cidx = col % 4; 680 value = v[l + k*n]; 681 if (col <= lastcol) low = 0; else high = nrow; 682 lastcol = col; 683 while (high-low > 7) { 684 t = (low+high)/2; 685 if (rp[t] > bcol) high = t; 686 else low = t; 687 } 688 for (i=low; i<high; i++) { 689 if (rp[i] > bcol) break; 690 if (rp[i] == bcol) { 691 bap = ap + 16*i + 4*cidx + ridx; 692 *bap += value; 693 goto noinsert1; 694 } 695 } 696 N = nrow++ - 1; 697 high++; /* added new column thus must search to one higher than before */ 698 /* shift up all the later entries in this row */ 699 for (ii=N; ii>=i; ii--) { 700 rp[ii+1] = rp[ii]; 701 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 702 } 703 if (N>=i) { 704 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 705 } 706 rp[i] = bcol; 707 ap[16*i + 4*cidx + ridx] = value; 708 noinsert1:; 709 low = i; 710 } 711 ailen[brow] = nrow; 712 } 713 PetscFunctionReturnVoid(); 714 } 715 EXTERN_C_END 716 717 /* UGLY, ugly, ugly 718 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 719 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 720 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 721 converts the entries into single precision and then calls ..._MatScalar() to put them 722 into the single precision data structures. 723 */ 724 #if defined(PETSC_USE_MAT_SINGLE) 725 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 726 #else 727 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 728 #endif 729 730 #define CHUNKSIZE 10 731 732 /* 733 Checks for missing diagonals 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 737 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 738 { 739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 740 PetscErrorCode ierr; 741 PetscInt *diag,*jj = a->j,i; 742 743 PetscFunctionBegin; 744 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 745 *missing = PETSC_FALSE; 746 diag = a->diag; 747 for (i=0; i<a->mbs; i++) { 748 if (jj[diag[i]] != i) { 749 *missing = PETSC_TRUE; 750 if (d) *d = i; 751 } 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 758 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 PetscErrorCode ierr; 762 PetscInt i,j,m = a->mbs; 763 764 PetscFunctionBegin; 765 if (!a->diag) { 766 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 767 } 768 for (i=0; i<m; i++) { 769 a->diag[i] = a->i[i+1]; 770 for (j=a->i[i]; j<a->i[i+1]; j++) { 771 if (a->j[j] == i) { 772 a->diag[i] = j; 773 break; 774 } 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 781 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 785 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 786 { 787 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788 PetscErrorCode ierr; 789 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1; 790 PetscInt *tia, *tja; 791 792 PetscFunctionBegin; 793 *nn = n; 794 if (!ia) PetscFunctionReturn(0); 795 if (symmetric) { 796 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 797 } else { 798 tia = a->i; tja = a->j; 799 } 800 801 if (!blockcompressed && bs > 1) { 802 (*nn) *= bs; 803 nbs = bs; 804 /* malloc & create the natural set of indices */ 805 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 806 (*ia)[0] = 0; 807 for (j=1; j<bs; j++) { 808 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 809 } 810 811 for (i=1; i<n; i++) { 812 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 813 for (j=1; j<bs; j++) { 814 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 815 } 816 } 817 818 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 819 820 if (ja) SETERRQ(PETSC_ERR_SUP,"Cannot provide uncompressed row indices"); 821 n *= bs; 822 nz *= bs*bs; 823 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 824 ierr = PetscFree(tia);CHKERRQ(ierr); 825 ierr = PetscFree(tja);CHKERRQ(ierr); 826 } 827 } else { 828 *ia = tia; 829 if (ja) *ja = tja; 830 } 831 if (oshift == 1) { 832 for (i=0; i<n+nbs; i++) (*ia)[i]++; 833 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 834 } 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 840 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 841 { 842 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 843 PetscErrorCode ierr; 844 PetscInt i,n = a->mbs,nz = a->i[n]; 845 846 PetscFunctionBegin; 847 if (!ia) PetscFunctionReturn(0); 848 if (!blockcompressed && A->rmap.bs > 1) { 849 ierr = PetscFree(*ia);CHKERRQ(ierr); 850 } else if (symmetric) { 851 ierr = PetscFree(*ia);CHKERRQ(ierr); 852 ierr = PetscFree(*ja);CHKERRQ(ierr); 853 } else if (oshift == 1) { /* blockcompressed */ 854 for (i=0; i<nz; i++) a->j[i]--; 855 for (i=0; i<n+1; i++) a->i[i]--; 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatDestroy_SeqBAIJ" 862 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 863 { 864 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 #if defined(PETSC_USE_LOG) 869 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 870 #endif 871 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 872 if (a->row) { 873 ierr = ISDestroy(a->row);CHKERRQ(ierr); 874 } 875 if (a->col) { 876 ierr = ISDestroy(a->col);CHKERRQ(ierr); 877 } 878 ierr = PetscFree(a->diag);CHKERRQ(ierr); 879 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 880 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 881 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 882 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 883 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 884 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 885 #if defined(PETSC_USE_MAT_SINGLE) 886 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 887 #endif 888 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 889 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 890 891 ierr = PetscFree(a);CHKERRQ(ierr); 892 893 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 894 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 898 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 899 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatSetOption_SeqBAIJ" 906 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 907 { 908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 909 PetscErrorCode ierr; 910 911 PetscFunctionBegin; 912 switch (op) { 913 case MAT_ROW_ORIENTED: 914 a->roworiented = flg; 915 break; 916 case MAT_KEEP_ZEROED_ROWS: 917 a->keepzeroedrows = flg; 918 break; 919 case MAT_NEW_NONZERO_LOCATIONS: 920 a->nonew = (flg ? 0 : 1); 921 break; 922 case MAT_NEW_NONZERO_LOCATION_ERR: 923 a->nonew = (flg ? -1 : 0); 924 break; 925 case MAT_NEW_NONZERO_ALLOCATION_ERR: 926 a->nonew = (flg ? -2 : 0); 927 break; 928 case MAT_NEW_DIAGONALS: 929 case MAT_IGNORE_OFF_PROC_ENTRIES: 930 case MAT_USE_HASH_TABLE: 931 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 932 break; 933 case MAT_SYMMETRIC: 934 case MAT_STRUCTURALLY_SYMMETRIC: 935 case MAT_HERMITIAN: 936 case MAT_SYMMETRY_ETERNAL: 937 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 938 break; 939 default: 940 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatGetRow_SeqBAIJ" 947 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 948 { 949 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 950 PetscErrorCode ierr; 951 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 952 MatScalar *aa,*aa_i; 953 PetscScalar *v_i; 954 955 PetscFunctionBegin; 956 bs = A->rmap.bs; 957 ai = a->i; 958 aj = a->j; 959 aa = a->a; 960 bs2 = a->bs2; 961 962 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 963 964 bn = row/bs; /* Block number */ 965 bp = row % bs; /* Block Position */ 966 M = ai[bn+1] - ai[bn]; 967 *nz = bs*M; 968 969 if (v) { 970 *v = 0; 971 if (*nz) { 972 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 973 for (i=0; i<M; i++) { /* for each block in the block row */ 974 v_i = *v + i*bs; 975 aa_i = aa + bs2*(ai[bn] + i); 976 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 977 } 978 } 979 } 980 981 if (idx) { 982 *idx = 0; 983 if (*nz) { 984 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 985 for (i=0; i<M; i++) { /* for each block in the block row */ 986 idx_i = *idx + i*bs; 987 itmp = bs*aj[ai[bn] + i]; 988 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 989 } 990 } 991 } 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 997 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 998 { 999 PetscErrorCode ierr; 1000 1001 PetscFunctionBegin; 1002 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1003 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1009 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1010 { 1011 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1012 Mat C; 1013 PetscErrorCode ierr; 1014 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1015 PetscInt *rows,*cols,bs2=a->bs2; 1016 PetscScalar *array; 1017 1018 PetscFunctionBegin; 1019 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1020 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1021 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1022 1023 #if defined(PETSC_USE_MAT_SINGLE) 1024 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1025 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1026 #else 1027 array = a->a; 1028 #endif 1029 1030 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1031 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1032 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1033 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1034 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1035 ierr = PetscFree(col);CHKERRQ(ierr); 1036 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1037 cols = rows + bs; 1038 for (i=0; i<mbs; i++) { 1039 cols[0] = i*bs; 1040 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1041 len = ai[i+1] - ai[i]; 1042 for (j=0; j<len; j++) { 1043 rows[0] = (*aj++)*bs; 1044 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1045 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1046 array += bs2; 1047 } 1048 } 1049 ierr = PetscFree(rows);CHKERRQ(ierr); 1050 #if defined(PETSC_USE_MAT_SINGLE) 1051 ierr = PetscFree(array); 1052 #endif 1053 1054 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1055 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1056 1057 if (B) { 1058 *B = C; 1059 } else { 1060 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1067 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1068 { 1069 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1072 int fd; 1073 PetscScalar *aa; 1074 FILE *file; 1075 1076 PetscFunctionBegin; 1077 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1078 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1079 col_lens[0] = MAT_FILE_COOKIE; 1080 1081 col_lens[1] = A->rmap.N; 1082 col_lens[2] = A->cmap.n; 1083 col_lens[3] = a->nz*bs2; 1084 1085 /* store lengths of each row and write (including header) to file */ 1086 count = 0; 1087 for (i=0; i<a->mbs; i++) { 1088 for (j=0; j<bs; j++) { 1089 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1090 } 1091 } 1092 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1093 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1094 1095 /* store column indices (zero start index) */ 1096 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1097 count = 0; 1098 for (i=0; i<a->mbs; i++) { 1099 for (j=0; j<bs; j++) { 1100 for (k=a->i[i]; k<a->i[i+1]; k++) { 1101 for (l=0; l<bs; l++) { 1102 jj[count++] = bs*a->j[k] + l; 1103 } 1104 } 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1108 ierr = PetscFree(jj);CHKERRQ(ierr); 1109 1110 /* store nonzero values */ 1111 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1112 count = 0; 1113 for (i=0; i<a->mbs; i++) { 1114 for (j=0; j<bs; j++) { 1115 for (k=a->i[i]; k<a->i[i+1]; k++) { 1116 for (l=0; l<bs; l++) { 1117 aa[count++] = a->a[bs2*k + l*bs + j]; 1118 } 1119 } 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1123 ierr = PetscFree(aa);CHKERRQ(ierr); 1124 1125 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1126 if (file) { 1127 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1128 } 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1134 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1135 { 1136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1137 PetscErrorCode ierr; 1138 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1139 PetscViewerFormat format; 1140 1141 PetscFunctionBegin; 1142 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1143 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1144 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1145 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1146 Mat aij; 1147 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1148 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1149 ierr = MatDestroy(aij);CHKERRQ(ierr); 1150 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1151 PetscFunctionReturn(0); 1152 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1153 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1154 for (i=0; i<a->mbs; i++) { 1155 for (j=0; j<bs; j++) { 1156 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1157 for (k=a->i[i]; k<a->i[i+1]; k++) { 1158 for (l=0; l<bs; l++) { 1159 #if defined(PETSC_USE_COMPLEX) 1160 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1161 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1162 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1163 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1164 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1165 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1166 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1167 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1168 } 1169 #else 1170 if (a->a[bs2*k + l*bs + j] != 0.0) { 1171 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1172 } 1173 #endif 1174 } 1175 } 1176 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1177 } 1178 } 1179 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1182 for (i=0; i<a->mbs; i++) { 1183 for (j=0; j<bs; j++) { 1184 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1185 for (k=a->i[i]; k<a->i[i+1]; k++) { 1186 for (l=0; l<bs; l++) { 1187 #if defined(PETSC_USE_COMPLEX) 1188 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1189 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1190 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1191 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1192 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1193 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1194 } else { 1195 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1196 } 1197 #else 1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1199 #endif 1200 } 1201 } 1202 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1203 } 1204 } 1205 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1206 } 1207 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1213 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1214 { 1215 Mat A = (Mat) Aa; 1216 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1219 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1220 MatScalar *aa; 1221 PetscViewer viewer; 1222 1223 PetscFunctionBegin; 1224 1225 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1226 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1227 1228 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1229 1230 /* loop over matrix elements drawing boxes */ 1231 color = PETSC_DRAW_BLUE; 1232 for (i=0,row=0; i<mbs; i++,row+=bs) { 1233 for (j=a->i[i]; j<a->i[i+1]; j++) { 1234 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1235 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1236 aa = a->a + j*bs2; 1237 for (k=0; k<bs; k++) { 1238 for (l=0; l<bs; l++) { 1239 if (PetscRealPart(*aa++) >= 0.) continue; 1240 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1241 } 1242 } 1243 } 1244 } 1245 color = PETSC_DRAW_CYAN; 1246 for (i=0,row=0; i<mbs; i++,row+=bs) { 1247 for (j=a->i[i]; j<a->i[i+1]; j++) { 1248 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1249 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1250 aa = a->a + j*bs2; 1251 for (k=0; k<bs; k++) { 1252 for (l=0; l<bs; l++) { 1253 if (PetscRealPart(*aa++) != 0.) continue; 1254 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1255 } 1256 } 1257 } 1258 } 1259 1260 color = PETSC_DRAW_RED; 1261 for (i=0,row=0; i<mbs; i++,row+=bs) { 1262 for (j=a->i[i]; j<a->i[i+1]; j++) { 1263 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1264 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1265 aa = a->a + j*bs2; 1266 for (k=0; k<bs; k++) { 1267 for (l=0; l<bs; l++) { 1268 if (PetscRealPart(*aa++) <= 0.) continue; 1269 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1270 } 1271 } 1272 } 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1279 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1280 { 1281 PetscErrorCode ierr; 1282 PetscReal xl,yl,xr,yr,w,h; 1283 PetscDraw draw; 1284 PetscTruth isnull; 1285 1286 PetscFunctionBegin; 1287 1288 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1289 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1290 1291 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1292 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1293 xr += w; yr += h; xl = -w; yl = -h; 1294 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1295 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1296 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatView_SeqBAIJ" 1302 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1303 { 1304 PetscErrorCode ierr; 1305 PetscTruth iascii,isbinary,isdraw; 1306 1307 PetscFunctionBegin; 1308 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1309 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1310 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1311 if (iascii){ 1312 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1313 } else if (isbinary) { 1314 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1315 } else if (isdraw) { 1316 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1317 } else { 1318 Mat B; 1319 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1320 ierr = MatView(B,viewer);CHKERRQ(ierr); 1321 ierr = MatDestroy(B);CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1329 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1330 { 1331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1332 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1333 PetscInt *ai = a->i,*ailen = a->ilen; 1334 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1335 MatScalar *ap,*aa = a->a; 1336 1337 PetscFunctionBegin; 1338 for (k=0; k<m; k++) { /* loop over rows */ 1339 row = im[k]; brow = row/bs; 1340 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1341 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1342 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1343 nrow = ailen[brow]; 1344 for (l=0; l<n; l++) { /* loop over columns */ 1345 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1346 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1347 col = in[l] ; 1348 bcol = col/bs; 1349 cidx = col%bs; 1350 ridx = row%bs; 1351 high = nrow; 1352 low = 0; /* assume unsorted */ 1353 while (high-low > 5) { 1354 t = (low+high)/2; 1355 if (rp[t] > bcol) high = t; 1356 else low = t; 1357 } 1358 for (i=low; i<high; i++) { 1359 if (rp[i] > bcol) break; 1360 if (rp[i] == bcol) { 1361 *v++ = ap[bs2*i+bs*cidx+ridx]; 1362 goto finished; 1363 } 1364 } 1365 *v++ = 0.0; 1366 finished:; 1367 } 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #if defined(PETSC_USE_MAT_SINGLE) 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1375 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1376 { 1377 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1378 PetscErrorCode ierr; 1379 PetscInt i,N = m*n*b->bs2; 1380 MatScalar *vsingle; 1381 1382 PetscFunctionBegin; 1383 if (N > b->setvalueslen) { 1384 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1385 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1386 b->setvalueslen = N; 1387 } 1388 vsingle = b->setvaluescopy; 1389 for (i=0; i<N; i++) { 1390 vsingle[i] = v[i]; 1391 } 1392 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 #endif 1396 1397 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1400 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1401 { 1402 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1403 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1404 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1405 PetscErrorCode ierr; 1406 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1407 PetscTruth roworiented=a->roworiented; 1408 const MatScalar *value = v; 1409 MatScalar *ap,*aa = a->a,*bap; 1410 1411 PetscFunctionBegin; 1412 if (roworiented) { 1413 stepval = (n-1)*bs; 1414 } else { 1415 stepval = (m-1)*bs; 1416 } 1417 for (k=0; k<m; k++) { /* loop over added rows */ 1418 row = im[k]; 1419 if (row < 0) continue; 1420 #if defined(PETSC_USE_DEBUG) 1421 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1422 #endif 1423 rp = aj + ai[row]; 1424 ap = aa + bs2*ai[row]; 1425 rmax = imax[row]; 1426 nrow = ailen[row]; 1427 low = 0; 1428 high = nrow; 1429 for (l=0; l<n; l++) { /* loop over added columns */ 1430 if (in[l] < 0) continue; 1431 #if defined(PETSC_USE_DEBUG) 1432 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1433 #endif 1434 col = in[l]; 1435 if (roworiented) { 1436 value = v + k*(stepval+bs)*bs + l*bs; 1437 } else { 1438 value = v + l*(stepval+bs)*bs + k*bs; 1439 } 1440 if (col <= lastcol) low = 0; else high = nrow; 1441 lastcol = col; 1442 while (high-low > 7) { 1443 t = (low+high)/2; 1444 if (rp[t] > col) high = t; 1445 else low = t; 1446 } 1447 for (i=low; i<high; i++) { 1448 if (rp[i] > col) break; 1449 if (rp[i] == col) { 1450 bap = ap + bs2*i; 1451 if (roworiented) { 1452 if (is == ADD_VALUES) { 1453 for (ii=0; ii<bs; ii++,value+=stepval) { 1454 for (jj=ii; jj<bs2; jj+=bs) { 1455 bap[jj] += *value++; 1456 } 1457 } 1458 } else { 1459 for (ii=0; ii<bs; ii++,value+=stepval) { 1460 for (jj=ii; jj<bs2; jj+=bs) { 1461 bap[jj] = *value++; 1462 } 1463 } 1464 } 1465 } else { 1466 if (is == ADD_VALUES) { 1467 for (ii=0; ii<bs; ii++,value+=stepval) { 1468 for (jj=0; jj<bs; jj++) { 1469 *bap++ += *value++; 1470 } 1471 } 1472 } else { 1473 for (ii=0; ii<bs; ii++,value+=stepval) { 1474 for (jj=0; jj<bs; jj++) { 1475 *bap++ = *value++; 1476 } 1477 } 1478 } 1479 } 1480 goto noinsert2; 1481 } 1482 } 1483 if (nonew == 1) goto noinsert2; 1484 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1485 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1486 N = nrow++ - 1; high++; 1487 /* shift up all the later entries in this row */ 1488 for (ii=N; ii>=i; ii--) { 1489 rp[ii+1] = rp[ii]; 1490 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1491 } 1492 if (N >= i) { 1493 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1494 } 1495 rp[i] = col; 1496 bap = ap + bs2*i; 1497 if (roworiented) { 1498 for (ii=0; ii<bs; ii++,value+=stepval) { 1499 for (jj=ii; jj<bs2; jj+=bs) { 1500 bap[jj] = *value++; 1501 } 1502 } 1503 } else { 1504 for (ii=0; ii<bs; ii++,value+=stepval) { 1505 for (jj=0; jj<bs; jj++) { 1506 *bap++ = *value++; 1507 } 1508 } 1509 } 1510 noinsert2:; 1511 low = i; 1512 } 1513 ailen[row] = nrow; 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1520 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1524 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1525 PetscErrorCode ierr; 1526 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1527 MatScalar *aa = a->a,*ap; 1528 PetscReal ratio=0.6; 1529 1530 PetscFunctionBegin; 1531 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1532 1533 if (m) rmax = ailen[0]; 1534 for (i=1; i<mbs; i++) { 1535 /* move each row back by the amount of empty slots (fshift) before it*/ 1536 fshift += imax[i-1] - ailen[i-1]; 1537 rmax = PetscMax(rmax,ailen[i]); 1538 if (fshift) { 1539 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1540 N = ailen[i]; 1541 for (j=0; j<N; j++) { 1542 ip[j-fshift] = ip[j]; 1543 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1544 } 1545 } 1546 ai[i] = ai[i-1] + ailen[i-1]; 1547 } 1548 if (mbs) { 1549 fshift += imax[mbs-1] - ailen[mbs-1]; 1550 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1551 } 1552 /* reset ilen and imax for each row */ 1553 for (i=0; i<mbs; i++) { 1554 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1555 } 1556 a->nz = ai[mbs]; 1557 1558 /* diagonals may have moved, so kill the diagonal pointers */ 1559 a->idiagvalid = PETSC_FALSE; 1560 if (fshift && a->diag) { 1561 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1562 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1563 a->diag = 0; 1564 } 1565 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1566 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1567 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1568 a->reallocs = 0; 1569 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1570 1571 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1572 if (a->compressedrow.use){ 1573 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1574 } 1575 1576 A->same_nonzero = PETSC_TRUE; 1577 PetscFunctionReturn(0); 1578 } 1579 1580 /* 1581 This function returns an array of flags which indicate the locations of contiguous 1582 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1583 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1584 Assume: sizes should be long enough to hold all the values. 1585 */ 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1588 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1589 { 1590 PetscInt i,j,k,row; 1591 PetscTruth flg; 1592 1593 PetscFunctionBegin; 1594 for (i=0,j=0; i<n; j++) { 1595 row = idx[i]; 1596 if (row%bs!=0) { /* Not the begining of a block */ 1597 sizes[j] = 1; 1598 i++; 1599 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1600 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1601 i++; 1602 } else { /* Begining of the block, so check if the complete block exists */ 1603 flg = PETSC_TRUE; 1604 for (k=1; k<bs; k++) { 1605 if (row+k != idx[i+k]) { /* break in the block */ 1606 flg = PETSC_FALSE; 1607 break; 1608 } 1609 } 1610 if (flg) { /* No break in the bs */ 1611 sizes[j] = bs; 1612 i+= bs; 1613 } else { 1614 sizes[j] = 1; 1615 i++; 1616 } 1617 } 1618 } 1619 *bs_max = j; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1625 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1626 { 1627 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1628 PetscErrorCode ierr; 1629 PetscInt i,j,k,count,*rows; 1630 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1631 PetscScalar zero = 0.0; 1632 MatScalar *aa; 1633 1634 PetscFunctionBegin; 1635 /* Make a copy of the IS and sort it */ 1636 /* allocate memory for rows,sizes */ 1637 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1638 sizes = rows + is_n; 1639 1640 /* copy IS values to rows, and sort them */ 1641 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1642 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1643 if (baij->keepzeroedrows) { 1644 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1645 bs_max = is_n; 1646 A->same_nonzero = PETSC_TRUE; 1647 } else { 1648 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1649 A->same_nonzero = PETSC_FALSE; 1650 } 1651 1652 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1653 row = rows[j]; 1654 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1655 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1656 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1657 if (sizes[i] == bs && !baij->keepzeroedrows) { 1658 if (diag != 0.0) { 1659 if (baij->ilen[row/bs] > 0) { 1660 baij->ilen[row/bs] = 1; 1661 baij->j[baij->i[row/bs]] = row/bs; 1662 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1663 } 1664 /* Now insert all the diagonal values for this bs */ 1665 for (k=0; k<bs; k++) { 1666 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 } else { /* (diag == 0.0) */ 1669 baij->ilen[row/bs] = 0; 1670 } /* end (diag == 0.0) */ 1671 } else { /* (sizes[i] != bs) */ 1672 #if defined (PETSC_USE_DEBUG) 1673 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1674 #endif 1675 for (k=0; k<count; k++) { 1676 aa[0] = zero; 1677 aa += bs; 1678 } 1679 if (diag != 0.0) { 1680 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1681 } 1682 } 1683 } 1684 1685 ierr = PetscFree(rows);CHKERRQ(ierr); 1686 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1692 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1693 { 1694 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1695 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1696 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1697 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1698 PetscErrorCode ierr; 1699 PetscInt ridx,cidx,bs2=a->bs2; 1700 PetscTruth roworiented=a->roworiented; 1701 MatScalar *ap,value,*aa=a->a,*bap; 1702 1703 PetscFunctionBegin; 1704 for (k=0; k<m; k++) { /* loop over added rows */ 1705 row = im[k]; 1706 brow = row/bs; 1707 if (row < 0) continue; 1708 #if defined(PETSC_USE_DEBUG) 1709 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1710 #endif 1711 rp = aj + ai[brow]; 1712 ap = aa + bs2*ai[brow]; 1713 rmax = imax[brow]; 1714 nrow = ailen[brow]; 1715 low = 0; 1716 high = nrow; 1717 for (l=0; l<n; l++) { /* loop over added columns */ 1718 if (in[l] < 0) continue; 1719 #if defined(PETSC_USE_DEBUG) 1720 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1721 #endif 1722 col = in[l]; bcol = col/bs; 1723 ridx = row % bs; cidx = col % bs; 1724 if (roworiented) { 1725 value = v[l + k*n]; 1726 } else { 1727 value = v[k + l*m]; 1728 } 1729 if (col <= lastcol) low = 0; else high = nrow; 1730 lastcol = col; 1731 while (high-low > 7) { 1732 t = (low+high)/2; 1733 if (rp[t] > bcol) high = t; 1734 else low = t; 1735 } 1736 for (i=low; i<high; i++) { 1737 if (rp[i] > bcol) break; 1738 if (rp[i] == bcol) { 1739 bap = ap + bs2*i + bs*cidx + ridx; 1740 if (is == ADD_VALUES) *bap += value; 1741 else *bap = value; 1742 goto noinsert1; 1743 } 1744 } 1745 if (nonew == 1) goto noinsert1; 1746 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1747 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1748 N = nrow++ - 1; high++; 1749 /* shift up all the later entries in this row */ 1750 for (ii=N; ii>=i; ii--) { 1751 rp[ii+1] = rp[ii]; 1752 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1753 } 1754 if (N>=i) { 1755 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1756 } 1757 rp[i] = bcol; 1758 ap[bs2*i + bs*cidx + ridx] = value; 1759 a->nz++; 1760 noinsert1:; 1761 low = i; 1762 } 1763 ailen[brow] = nrow; 1764 } 1765 A->same_nonzero = PETSC_FALSE; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1772 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1773 { 1774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1775 Mat outA; 1776 PetscErrorCode ierr; 1777 PetscTruth row_identity,col_identity; 1778 1779 PetscFunctionBegin; 1780 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1781 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1782 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1783 if (!row_identity || !col_identity) { 1784 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1785 } 1786 1787 outA = inA; 1788 inA->factor = FACTOR_LU; 1789 1790 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1791 1792 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1793 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1794 a->row = row; 1795 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1796 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1797 a->col = col; 1798 1799 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1800 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1801 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1802 1803 /* 1804 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1805 for ILU(0) factorization with natural ordering 1806 */ 1807 if (inA->rmap.bs < 8) { 1808 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1809 } else { 1810 if (!a->solve_work) { 1811 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1812 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1813 } 1814 } 1815 1816 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1817 1818 PetscFunctionReturn(0); 1819 } 1820 1821 EXTERN_C_BEGIN 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1824 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1825 { 1826 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1827 PetscInt i,nz,nbs; 1828 1829 PetscFunctionBegin; 1830 nz = baij->maxnz/baij->bs2; 1831 nbs = baij->nbs; 1832 for (i=0; i<nz; i++) { 1833 baij->j[i] = indices[i]; 1834 } 1835 baij->nz = nz; 1836 for (i=0; i<nbs; i++) { 1837 baij->ilen[i] = baij->imax[i]; 1838 } 1839 1840 PetscFunctionReturn(0); 1841 } 1842 EXTERN_C_END 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1846 /*@ 1847 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1848 in the matrix. 1849 1850 Input Parameters: 1851 + mat - the SeqBAIJ matrix 1852 - indices - the column indices 1853 1854 Level: advanced 1855 1856 Notes: 1857 This can be called if you have precomputed the nonzero structure of the 1858 matrix and want to provide it to the matrix object to improve the performance 1859 of the MatSetValues() operation. 1860 1861 You MUST have set the correct numbers of nonzeros per row in the call to 1862 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1863 1864 MUST be called before any calls to MatSetValues(); 1865 1866 @*/ 1867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1868 { 1869 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1870 1871 PetscFunctionBegin; 1872 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1873 PetscValidPointer(indices,2); 1874 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1875 if (f) { 1876 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1877 } else { 1878 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1885 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1886 { 1887 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1888 PetscErrorCode ierr; 1889 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1890 PetscReal atmp; 1891 PetscScalar *x,zero = 0.0; 1892 MatScalar *aa; 1893 PetscInt ncols,brow,krow,kcol; 1894 1895 PetscFunctionBegin; 1896 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1897 bs = A->rmap.bs; 1898 aa = a->a; 1899 ai = a->i; 1900 aj = a->j; 1901 mbs = a->mbs; 1902 1903 ierr = VecSet(v,zero);CHKERRQ(ierr); 1904 if (idx) { 1905 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1906 } 1907 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1908 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1909 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1910 for (i=0; i<mbs; i++) { 1911 ncols = ai[1] - ai[0]; ai++; 1912 brow = bs*i; 1913 for (j=0; j<ncols; j++){ 1914 for (kcol=0; kcol<bs; kcol++){ 1915 for (krow=0; krow<bs; krow++){ 1916 atmp = PetscAbsScalar(*aa);aa++; 1917 row = brow + krow; /* row index */ 1918 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1919 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1920 } 1921 } 1922 aj++; 1923 } 1924 } 1925 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatCopy_SeqBAIJ" 1931 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1932 { 1933 PetscErrorCode ierr; 1934 1935 PetscFunctionBegin; 1936 /* If the two matrices have the same copy implementation, use fast copy. */ 1937 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1938 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1939 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1940 1941 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1942 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1943 } 1944 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1945 } else { 1946 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1953 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1954 { 1955 PetscErrorCode ierr; 1956 1957 PetscFunctionBegin; 1958 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1964 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1965 { 1966 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1967 PetscFunctionBegin; 1968 *array = a->a; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1974 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #include "petscblaslapack.h" 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1983 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1984 { 1985 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1986 PetscErrorCode ierr; 1987 PetscInt i,bs=Y->rmap.bs,j,bs2; 1988 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1989 1990 PetscFunctionBegin; 1991 if (str == SAME_NONZERO_PATTERN) { 1992 PetscScalar alpha = a; 1993 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1994 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1995 if (y->xtoy && y->XtoY != X) { 1996 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1997 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1998 } 1999 if (!y->xtoy) { /* get xtoy */ 2000 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2001 y->XtoY = X; 2002 } 2003 bs2 = bs*bs; 2004 for (i=0; i<x->nz; i++) { 2005 j = 0; 2006 while (j < bs2){ 2007 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2008 j++; 2009 } 2010 } 2011 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2012 } else { 2013 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2020 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2021 { 2022 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2023 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2024 PetscScalar *aa = a->a; 2025 2026 PetscFunctionBegin; 2027 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2033 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2034 { 2035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2036 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2037 PetscScalar *aa = a->a; 2038 2039 PetscFunctionBegin; 2040 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 2045 /* -------------------------------------------------------------------*/ 2046 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2047 MatGetRow_SeqBAIJ, 2048 MatRestoreRow_SeqBAIJ, 2049 MatMult_SeqBAIJ_N, 2050 /* 4*/ MatMultAdd_SeqBAIJ_N, 2051 MatMultTranspose_SeqBAIJ, 2052 MatMultTransposeAdd_SeqBAIJ, 2053 MatSolve_SeqBAIJ_N, 2054 0, 2055 0, 2056 /*10*/ 0, 2057 MatLUFactor_SeqBAIJ, 2058 0, 2059 0, 2060 MatTranspose_SeqBAIJ, 2061 /*15*/ MatGetInfo_SeqBAIJ, 2062 MatEqual_SeqBAIJ, 2063 MatGetDiagonal_SeqBAIJ, 2064 MatDiagonalScale_SeqBAIJ, 2065 MatNorm_SeqBAIJ, 2066 /*20*/ 0, 2067 MatAssemblyEnd_SeqBAIJ, 2068 0, 2069 MatSetOption_SeqBAIJ, 2070 MatZeroEntries_SeqBAIJ, 2071 /*25*/ MatZeroRows_SeqBAIJ, 2072 MatLUFactorSymbolic_SeqBAIJ, 2073 MatLUFactorNumeric_SeqBAIJ_N, 2074 MatCholeskyFactorSymbolic_SeqBAIJ, 2075 MatCholeskyFactorNumeric_SeqBAIJ_N, 2076 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2077 MatILUFactorSymbolic_SeqBAIJ, 2078 MatICCFactorSymbolic_SeqBAIJ, 2079 MatGetArray_SeqBAIJ, 2080 MatRestoreArray_SeqBAIJ, 2081 /*35*/ MatDuplicate_SeqBAIJ, 2082 0, 2083 0, 2084 MatILUFactor_SeqBAIJ, 2085 0, 2086 /*40*/ MatAXPY_SeqBAIJ, 2087 MatGetSubMatrices_SeqBAIJ, 2088 MatIncreaseOverlap_SeqBAIJ, 2089 MatGetValues_SeqBAIJ, 2090 MatCopy_SeqBAIJ, 2091 /*45*/ 0, 2092 MatScale_SeqBAIJ, 2093 0, 2094 0, 2095 0, 2096 /*50*/ 0, 2097 MatGetRowIJ_SeqBAIJ, 2098 MatRestoreRowIJ_SeqBAIJ, 2099 0, 2100 0, 2101 /*55*/ 0, 2102 0, 2103 0, 2104 0, 2105 MatSetValuesBlocked_SeqBAIJ, 2106 /*60*/ MatGetSubMatrix_SeqBAIJ, 2107 MatDestroy_SeqBAIJ, 2108 MatView_SeqBAIJ, 2109 0, 2110 0, 2111 /*65*/ 0, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2117 MatConvert_Basic, 2118 0, 2119 0, 2120 0, 2121 /*75*/ 0, 2122 0, 2123 0, 2124 0, 2125 0, 2126 /*80*/ 0, 2127 0, 2128 0, 2129 0, 2130 MatLoad_SeqBAIJ, 2131 /*85*/ 0, 2132 0, 2133 0, 2134 0, 2135 0, 2136 /*90*/ 0, 2137 0, 2138 0, 2139 0, 2140 0, 2141 /*95*/ 0, 2142 0, 2143 0, 2144 0, 2145 0, 2146 /*100*/0, 2147 0, 2148 0, 2149 0, 2150 0, 2151 /*105*/0, 2152 MatRealPart_SeqBAIJ, 2153 MatImaginaryPart_SeqBAIJ, 2154 0, 2155 0, 2156 /*110*/0, 2157 0, 2158 0, 2159 0, 2160 MatMissingDiagonal_SeqBAIJ 2161 /*115*/ 2162 }; 2163 2164 EXTERN_C_BEGIN 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2167 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2168 { 2169 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2170 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 if (aij->nonew != 1) { 2175 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2176 } 2177 2178 /* allocate space for values if not already there */ 2179 if (!aij->saved_values) { 2180 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2181 } 2182 2183 /* copy values over */ 2184 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 EXTERN_C_END 2188 2189 EXTERN_C_BEGIN 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2192 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2193 { 2194 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2195 PetscErrorCode ierr; 2196 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2197 2198 PetscFunctionBegin; 2199 if (aij->nonew != 1) { 2200 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2201 } 2202 if (!aij->saved_values) { 2203 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2204 } 2205 2206 /* copy values over */ 2207 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 EXTERN_C_END 2211 2212 EXTERN_C_BEGIN 2213 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2215 EXTERN_C_END 2216 2217 EXTERN_C_BEGIN 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2220 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2221 { 2222 Mat_SeqBAIJ *b; 2223 PetscErrorCode ierr; 2224 PetscInt i,mbs,nbs,bs2,newbs = bs; 2225 PetscTruth flg,skipallocation = PETSC_FALSE; 2226 2227 PetscFunctionBegin; 2228 2229 if (nz == MAT_SKIP_ALLOCATION) { 2230 skipallocation = PETSC_TRUE; 2231 nz = 0; 2232 } 2233 2234 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2235 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2236 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2237 2238 if (nnz && newbs != bs) { 2239 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2240 } 2241 bs = newbs; 2242 2243 B->rmap.bs = B->cmap.bs = bs; 2244 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2245 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2246 2247 B->preallocated = PETSC_TRUE; 2248 2249 mbs = B->rmap.n/bs; 2250 nbs = B->cmap.n/bs; 2251 bs2 = bs*bs; 2252 2253 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2254 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2255 } 2256 2257 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2258 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2259 if (nnz) { 2260 for (i=0; i<mbs; i++) { 2261 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2262 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2263 } 2264 } 2265 2266 b = (Mat_SeqBAIJ*)B->data; 2267 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2268 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2269 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2270 2271 B->ops->solve = MatSolve_SeqBAIJ_Update; 2272 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2273 if (!flg) { 2274 switch (bs) { 2275 case 1: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2277 B->ops->mult = MatMult_SeqBAIJ_1; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2279 break; 2280 case 2: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2282 B->ops->mult = MatMult_SeqBAIJ_2; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2284 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2285 break; 2286 case 3: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2288 B->ops->mult = MatMult_SeqBAIJ_3; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2290 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2291 break; 2292 case 4: 2293 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2294 B->ops->mult = MatMult_SeqBAIJ_4; 2295 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2296 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2297 break; 2298 case 5: 2299 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2300 B->ops->mult = MatMult_SeqBAIJ_5; 2301 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2302 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2303 break; 2304 case 6: 2305 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2306 B->ops->mult = MatMult_SeqBAIJ_6; 2307 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2308 break; 2309 case 7: 2310 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2311 B->ops->mult = MatMult_SeqBAIJ_7; 2312 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2313 break; 2314 default: 2315 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2316 B->ops->mult = MatMult_SeqBAIJ_N; 2317 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2318 break; 2319 } 2320 } 2321 B->rmap.bs = bs; 2322 b->mbs = mbs; 2323 b->nbs = nbs; 2324 if (!skipallocation) { 2325 if (!b->imax) { 2326 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2327 } 2328 /* b->ilen will count nonzeros in each block row so far. */ 2329 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2330 if (!nnz) { 2331 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2332 else if (nz <= 0) nz = 1; 2333 for (i=0; i<mbs; i++) b->imax[i] = nz; 2334 nz = nz*mbs; 2335 } else { 2336 nz = 0; 2337 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2338 } 2339 2340 /* allocate the matrix space */ 2341 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2342 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2343 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2344 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2345 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2346 b->singlemalloc = PETSC_TRUE; 2347 b->i[0] = 0; 2348 for (i=1; i<mbs+1; i++) { 2349 b->i[i] = b->i[i-1] + b->imax[i-1]; 2350 } 2351 b->free_a = PETSC_TRUE; 2352 b->free_ij = PETSC_TRUE; 2353 } else { 2354 b->free_a = PETSC_FALSE; 2355 b->free_ij = PETSC_FALSE; 2356 } 2357 2358 B->rmap.bs = bs; 2359 b->bs2 = bs2; 2360 b->mbs = mbs; 2361 b->nz = 0; 2362 b->maxnz = nz*bs2; 2363 B->info.nz_unneeded = (PetscReal)b->maxnz; 2364 PetscFunctionReturn(0); 2365 } 2366 EXTERN_C_END 2367 2368 /*MC 2369 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2370 block sparse compressed row format. 2371 2372 Options Database Keys: 2373 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2374 2375 Level: beginner 2376 2377 .seealso: MatCreateSeqBAIJ() 2378 M*/ 2379 2380 EXTERN_C_BEGIN 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatCreate_SeqBAIJ" 2383 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2384 { 2385 PetscErrorCode ierr; 2386 PetscMPIInt size; 2387 Mat_SeqBAIJ *b; 2388 2389 PetscFunctionBegin; 2390 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2391 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2392 2393 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2394 B->data = (void*)b; 2395 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2396 B->factor = 0; 2397 B->mapping = 0; 2398 b->row = 0; 2399 b->col = 0; 2400 b->icol = 0; 2401 b->reallocs = 0; 2402 b->saved_values = 0; 2403 #if defined(PETSC_USE_MAT_SINGLE) 2404 b->setvalueslen = 0; 2405 b->setvaluescopy = PETSC_NULL; 2406 #endif 2407 2408 b->roworiented = PETSC_TRUE; 2409 b->nonew = 0; 2410 b->diag = 0; 2411 b->solve_work = 0; 2412 b->mult_work = 0; 2413 B->spptr = 0; 2414 B->info.nz_unneeded = (PetscReal)b->maxnz; 2415 b->keepzeroedrows = PETSC_FALSE; 2416 b->xtoy = 0; 2417 b->XtoY = 0; 2418 b->compressedrow.use = PETSC_FALSE; 2419 b->compressedrow.nrows = 0; 2420 b->compressedrow.i = PETSC_NULL; 2421 b->compressedrow.rindex = PETSC_NULL; 2422 b->compressedrow.checked = PETSC_FALSE; 2423 B->same_nonzero = PETSC_FALSE; 2424 2425 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2426 "MatInvertBlockDiagonal_SeqBAIJ", 2427 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2429 "MatStoreValues_SeqBAIJ", 2430 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2431 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2432 "MatRetrieveValues_SeqBAIJ", 2433 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2434 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2435 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2436 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2437 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2438 "MatConvert_SeqBAIJ_SeqAIJ", 2439 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2440 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2441 "MatConvert_SeqBAIJ_SeqSBAIJ", 2442 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2443 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2444 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2445 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2446 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 EXTERN_C_END 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2453 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2454 { 2455 Mat C; 2456 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2457 PetscErrorCode ierr; 2458 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2459 2460 PetscFunctionBegin; 2461 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2462 2463 *B = 0; 2464 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2465 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2466 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2467 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2468 c = (Mat_SeqBAIJ*)C->data; 2469 2470 C->rmap.N = A->rmap.N; 2471 C->cmap.N = A->cmap.N; 2472 C->rmap.bs = A->rmap.bs; 2473 c->bs2 = a->bs2; 2474 c->mbs = a->mbs; 2475 c->nbs = a->nbs; 2476 2477 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2478 for (i=0; i<mbs; i++) { 2479 c->imax[i] = a->imax[i]; 2480 c->ilen[i] = a->ilen[i]; 2481 } 2482 2483 /* allocate the matrix space */ 2484 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2485 c->singlemalloc = PETSC_TRUE; 2486 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2487 if (mbs > 0) { 2488 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2489 if (cpvalues == MAT_COPY_VALUES) { 2490 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2491 } else { 2492 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2493 } 2494 } 2495 c->roworiented = a->roworiented; 2496 c->nonew = a->nonew; 2497 2498 if (a->diag) { 2499 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2500 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2501 for (i=0; i<mbs; i++) { 2502 c->diag[i] = a->diag[i]; 2503 } 2504 } else c->diag = 0; 2505 c->nz = a->nz; 2506 c->maxnz = a->maxnz; 2507 c->solve_work = 0; 2508 c->mult_work = 0; 2509 c->free_a = PETSC_TRUE; 2510 c->free_ij = PETSC_TRUE; 2511 C->preallocated = PETSC_TRUE; 2512 C->assembled = PETSC_TRUE; 2513 2514 c->compressedrow.use = a->compressedrow.use; 2515 c->compressedrow.nrows = a->compressedrow.nrows; 2516 c->compressedrow.checked = a->compressedrow.checked; 2517 if ( a->compressedrow.checked && a->compressedrow.use){ 2518 i = a->compressedrow.nrows; 2519 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2520 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2521 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2522 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2523 } else { 2524 c->compressedrow.use = PETSC_FALSE; 2525 c->compressedrow.i = PETSC_NULL; 2526 c->compressedrow.rindex = PETSC_NULL; 2527 } 2528 C->same_nonzero = A->same_nonzero; 2529 *B = C; 2530 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 #undef __FUNCT__ 2535 #define __FUNCT__ "MatLoad_SeqBAIJ" 2536 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2537 { 2538 Mat_SeqBAIJ *a; 2539 Mat B; 2540 PetscErrorCode ierr; 2541 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2542 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2543 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2544 PetscInt *masked,nmask,tmp,bs2,ishift; 2545 PetscMPIInt size; 2546 int fd; 2547 PetscScalar *aa; 2548 MPI_Comm comm = ((PetscObject)viewer)->comm; 2549 2550 PetscFunctionBegin; 2551 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2552 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2553 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2554 bs2 = bs*bs; 2555 2556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2557 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2558 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2560 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2561 M = header[1]; N = header[2]; nz = header[3]; 2562 2563 if (header[3] < 0) { 2564 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2565 } 2566 2567 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2568 2569 /* 2570 This code adds extra rows to make sure the number of rows is 2571 divisible by the blocksize 2572 */ 2573 mbs = M/bs; 2574 extra_rows = bs - M + bs*(mbs); 2575 if (extra_rows == bs) extra_rows = 0; 2576 else mbs++; 2577 if (extra_rows) { 2578 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2579 } 2580 2581 /* read in row lengths */ 2582 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2583 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2584 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2585 2586 /* read in column indices */ 2587 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2588 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2589 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2590 2591 /* loop over row lengths determining block row lengths */ 2592 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2593 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2594 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2595 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2596 masked = mask + mbs; 2597 rowcount = 0; nzcount = 0; 2598 for (i=0; i<mbs; i++) { 2599 nmask = 0; 2600 for (j=0; j<bs; j++) { 2601 kmax = rowlengths[rowcount]; 2602 for (k=0; k<kmax; k++) { 2603 tmp = jj[nzcount++]/bs; 2604 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2605 } 2606 rowcount++; 2607 } 2608 browlengths[i] += nmask; 2609 /* zero out the mask elements we set */ 2610 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2611 } 2612 2613 /* create our matrix */ 2614 ierr = MatCreate(comm,&B); 2615 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2616 ierr = MatSetType(B,type);CHKERRQ(ierr); 2617 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2618 a = (Mat_SeqBAIJ*)B->data; 2619 2620 /* set matrix "i" values */ 2621 a->i[0] = 0; 2622 for (i=1; i<= mbs; i++) { 2623 a->i[i] = a->i[i-1] + browlengths[i-1]; 2624 a->ilen[i-1] = browlengths[i-1]; 2625 } 2626 a->nz = 0; 2627 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2628 2629 /* read in nonzero values */ 2630 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2631 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2632 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2633 2634 /* set "a" and "j" values into matrix */ 2635 nzcount = 0; jcount = 0; 2636 for (i=0; i<mbs; i++) { 2637 nzcountb = nzcount; 2638 nmask = 0; 2639 for (j=0; j<bs; j++) { 2640 kmax = rowlengths[i*bs+j]; 2641 for (k=0; k<kmax; k++) { 2642 tmp = jj[nzcount++]/bs; 2643 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2644 } 2645 } 2646 /* sort the masked values */ 2647 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2648 2649 /* set "j" values into matrix */ 2650 maskcount = 1; 2651 for (j=0; j<nmask; j++) { 2652 a->j[jcount++] = masked[j]; 2653 mask[masked[j]] = maskcount++; 2654 } 2655 /* set "a" values into matrix */ 2656 ishift = bs2*a->i[i]; 2657 for (j=0; j<bs; j++) { 2658 kmax = rowlengths[i*bs+j]; 2659 for (k=0; k<kmax; k++) { 2660 tmp = jj[nzcountb]/bs ; 2661 block = mask[tmp] - 1; 2662 point = jj[nzcountb] - bs*tmp; 2663 idx = ishift + bs2*block + j + bs*point; 2664 a->a[idx] = (MatScalar)aa[nzcountb++]; 2665 } 2666 } 2667 /* zero out the mask elements we set */ 2668 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2669 } 2670 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2671 2672 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2673 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2674 ierr = PetscFree(aa);CHKERRQ(ierr); 2675 ierr = PetscFree(jj);CHKERRQ(ierr); 2676 ierr = PetscFree(mask);CHKERRQ(ierr); 2677 2678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatView_Private(B);CHKERRQ(ierr); 2681 2682 *A = B; 2683 PetscFunctionReturn(0); 2684 } 2685 2686 #undef __FUNCT__ 2687 #define __FUNCT__ "MatCreateSeqBAIJ" 2688 /*@C 2689 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2690 compressed row) format. For good matrix assembly performance the 2691 user should preallocate the matrix storage by setting the parameter nz 2692 (or the array nnz). By setting these parameters accurately, performance 2693 during matrix assembly can be increased by more than a factor of 50. 2694 2695 Collective on MPI_Comm 2696 2697 Input Parameters: 2698 + comm - MPI communicator, set to PETSC_COMM_SELF 2699 . bs - size of block 2700 . m - number of rows 2701 . n - number of columns 2702 . nz - number of nonzero blocks per block row (same for all rows) 2703 - nnz - array containing the number of nonzero blocks in the various block rows 2704 (possibly different for each block row) or PETSC_NULL 2705 2706 Output Parameter: 2707 . A - the matrix 2708 2709 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2710 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2711 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2712 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2713 2714 Options Database Keys: 2715 . -mat_no_unroll - uses code that does not unroll the loops in the 2716 block calculations (much slower) 2717 . -mat_block_size - size of the blocks to use 2718 2719 Level: intermediate 2720 2721 Notes: 2722 The number of rows and columns must be divisible by blocksize. 2723 2724 If the nnz parameter is given then the nz parameter is ignored 2725 2726 A nonzero block is any block that as 1 or more nonzeros in it 2727 2728 The block AIJ format is fully compatible with standard Fortran 77 2729 storage. That is, the stored row and column indices can begin at 2730 either one (as in Fortran) or zero. See the users' manual for details. 2731 2732 Specify the preallocated storage with either nz or nnz (not both). 2733 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2734 allocation. For additional details, see the users manual chapter on 2735 matrices. 2736 2737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2738 @*/ 2739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2740 { 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2745 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2746 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2747 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2753 /*@C 2754 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2755 per row in the matrix. For good matrix assembly performance the 2756 user should preallocate the matrix storage by setting the parameter nz 2757 (or the array nnz). By setting these parameters accurately, performance 2758 during matrix assembly can be increased by more than a factor of 50. 2759 2760 Collective on MPI_Comm 2761 2762 Input Parameters: 2763 + A - the matrix 2764 . bs - size of block 2765 . nz - number of block nonzeros per block row (same for all rows) 2766 - nnz - array containing the number of block nonzeros in the various block rows 2767 (possibly different for each block row) or PETSC_NULL 2768 2769 Options Database Keys: 2770 . -mat_no_unroll - uses code that does not unroll the loops in the 2771 block calculations (much slower) 2772 . -mat_block_size - size of the blocks to use 2773 2774 Level: intermediate 2775 2776 Notes: 2777 If the nnz parameter is given then the nz parameter is ignored 2778 2779 You can call MatGetInfo() to get information on how effective the preallocation was; 2780 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2781 You can also run with the option -info and look for messages with the string 2782 malloc in them to see if additional memory allocation was needed. 2783 2784 The block AIJ format is fully compatible with standard Fortran 77 2785 storage. That is, the stored row and column indices can begin at 2786 either one (as in Fortran) or zero. See the users' manual for details. 2787 2788 Specify the preallocated storage with either nz or nnz (not both). 2789 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2790 allocation. For additional details, see the users manual chapter on 2791 matrices. 2792 2793 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 2794 @*/ 2795 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2796 { 2797 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2798 2799 PetscFunctionBegin; 2800 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2801 if (f) { 2802 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2803 } 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2809 /*@ 2810 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2811 (upper triangular entries in CSR format) provided by the user. 2812 2813 Collective on MPI_Comm 2814 2815 Input Parameters: 2816 + comm - must be an MPI communicator of size 1 2817 . bs - size of block 2818 . m - number of rows 2819 . n - number of columns 2820 . i - row indices 2821 . j - column indices 2822 - a - matrix values 2823 2824 Output Parameter: 2825 . mat - the matrix 2826 2827 Level: intermediate 2828 2829 Notes: 2830 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2831 once the matrix is destroyed 2832 2833 You cannot set new nonzero locations into this matrix, that will generate an error. 2834 2835 The i and j indices are 0 based 2836 2837 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2838 2839 @*/ 2840 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2841 { 2842 PetscErrorCode ierr; 2843 PetscInt ii; 2844 Mat_SeqBAIJ *baij; 2845 2846 PetscFunctionBegin; 2847 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2848 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2849 2850 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2851 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2852 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2853 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2854 baij = (Mat_SeqBAIJ*)(*mat)->data; 2855 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2856 2857 baij->i = i; 2858 baij->j = j; 2859 baij->a = a; 2860 baij->singlemalloc = PETSC_FALSE; 2861 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2862 baij->free_a = PETSC_FALSE; 2863 baij->free_ij = PETSC_FALSE; 2864 2865 for (ii=0; ii<m; ii++) { 2866 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2867 #if defined(PETSC_USE_DEBUG) 2868 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2869 #endif 2870 } 2871 #if defined(PETSC_USE_DEBUG) 2872 for (ii=0; ii<baij->i[m]; ii++) { 2873 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2874 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2875 } 2876 #endif 2877 2878 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 PetscFunctionReturn(0); 2881 } 2882