1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@C 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr,(*f)(Mat); 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 29 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 32 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33 if (f) { 34 ierr = (*f)(mat);CHKERRQ(ierr); 35 } else { 36 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 EXTERN_C_BEGIN 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45 { 46 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 47 PetscErrorCode ierr; 48 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 49 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 50 51 PetscFunctionBegin; 52 if (a->idiagvalid) PetscFunctionReturn(0); 53 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 54 diag_offset = a->diag; 55 if (!a->idiag) { 56 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 57 } 58 diag = a->idiag; 59 mdiag = a->idiag+bs*bs*mbs; 60 /* factor and invert each block */ 61 switch (bs){ 62 case 2: 63 for (i=0; i<mbs; i++) { 64 odiag = v + 4*diag_offset[i]; 65 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 66 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 67 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 68 diag += 4; 69 mdiag += 4; 70 } 71 break; 72 case 3: 73 for (i=0; i<mbs; i++) { 74 odiag = v + 9*diag_offset[i]; 75 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 76 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 77 diag[8] = odiag[8]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 80 mdiag[8] = odiag[8]; 81 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 82 diag += 9; 83 mdiag += 9; 84 } 85 break; 86 case 4: 87 for (i=0; i<mbs; i++) { 88 odiag = v + 16*diag_offset[i]; 89 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 90 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 91 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 92 diag += 16; 93 mdiag += 16; 94 } 95 break; 96 case 5: 97 for (i=0; i<mbs; i++) { 98 odiag = v + 25*diag_offset[i]; 99 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 100 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 102 diag += 25; 103 mdiag += 25; 104 } 105 break; 106 default: 107 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 108 } 109 a->idiagvalid = PETSC_TRUE; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 116 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 117 { 118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 119 PetscScalar *x,x1,x2,s1,s2; 120 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 121 PetscErrorCode ierr; 122 PetscInt m = a->mbs,i,i2,nz,idx; 123 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 124 125 PetscFunctionBegin; 126 its = its*lits; 127 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 128 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 129 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 130 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 131 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 132 133 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 134 135 diag = a->diag; 136 idiag = a->idiag; 137 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 138 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 139 140 if (flag & SOR_ZERO_INITIAL_GUESS) { 141 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 142 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 143 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 144 i2 = 2; 145 idiag += 4; 146 for (i=1; i<m; i++) { 147 v = aa + 4*ai[i]; 148 vi = aj + ai[i]; 149 nz = diag[i] - ai[i]; 150 s1 = b[i2]; s2 = b[i2+1]; 151 while (nz--) { 152 idx = 2*(*vi++); 153 x1 = x[idx]; x2 = x[1+idx]; 154 s1 -= v[0]*x1 + v[2]*x2; 155 s2 -= v[1]*x1 + v[3]*x2; 156 v += 4; 157 } 158 x[i2] = idiag[0]*s1 + idiag[2]*s2; 159 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 160 idiag += 4; 161 i2 += 2; 162 } 163 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 164 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 165 } 166 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 167 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 168 i2 = 0; 169 mdiag = a->idiag+4*a->mbs; 170 for (i=0; i<m; i++) { 171 x1 = x[i2]; x2 = x[i2+1]; 172 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 173 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 174 mdiag += 4; 175 i2 += 2; 176 } 177 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 178 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 179 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 180 } 181 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 182 idiag = a->idiag+4*a->mbs - 4; 183 i2 = 2*m - 2; 184 x1 = x[i2]; x2 = x[i2+1]; 185 x[i2] = idiag[0]*x1 + idiag[2]*x2; 186 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 187 idiag -= 4; 188 i2 -= 2; 189 for (i=m-2; i>=0; i--) { 190 v = aa + 4*(diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; s2 = x[i2+1]; 194 while (nz--) { 195 idx = 2*(*vi++); 196 x1 = x[idx]; x2 = x[1+idx]; 197 s1 -= v[0]*x1 + v[2]*x2; 198 s2 -= v[1]*x1 + v[3]*x2; 199 v += 4; 200 } 201 x[i2] = idiag[0]*s1 + idiag[2]*s2; 202 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 203 idiag -= 4; 204 i2 -= 2; 205 } 206 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 207 } 208 } else { 209 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 210 } 211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 212 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 #undef __FUNCT__ 217 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 218 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 219 { 220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 221 PetscScalar *x,x1,x2,x3,s1,s2,s3; 222 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 223 PetscErrorCode ierr; 224 PetscInt m = a->mbs,i,i2,nz,idx; 225 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 226 227 PetscFunctionBegin; 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 246 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 247 i2 = 3; 248 idiag += 9; 249 for (i=1; i<m; i++) { 250 v = aa + 9*ai[i]; 251 vi = aj + ai[i]; 252 nz = diag[i] - ai[i]; 253 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 254 while (nz--) { 255 idx = 3*(*vi++); 256 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 257 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 258 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 259 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 260 v += 9; 261 } 262 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 263 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 264 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 265 idiag += 9; 266 i2 += 3; 267 } 268 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 269 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 270 } 271 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 272 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 273 i2 = 0; 274 mdiag = a->idiag+9*a->mbs; 275 for (i=0; i<m; i++) { 276 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 277 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 278 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 279 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 280 mdiag += 9; 281 i2 += 3; 282 } 283 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288 idiag = a->idiag+9*a->mbs - 9; 289 i2 = 3*m - 3; 290 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 291 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 292 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 293 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 294 idiag -= 9; 295 i2 -= 3; 296 for (i=m-2; i>=0; i--) { 297 v = aa + 9*(diag[i]+1); 298 vi = aj + diag[i] + 1; 299 nz = ai[i+1] - diag[i] - 1; 300 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 301 while (nz--) { 302 idx = 3*(*vi++); 303 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 304 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 305 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 306 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 307 v += 9; 308 } 309 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 310 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 311 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 312 idiag -= 9; 313 i2 -= 3; 314 } 315 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 316 } 317 } else { 318 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 319 } 320 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 321 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 327 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 328 { 329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 330 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 331 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 332 PetscErrorCode ierr; 333 PetscInt m = a->mbs,i,i2,nz,idx; 334 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 335 336 PetscFunctionBegin; 337 its = its*lits; 338 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 339 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 340 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 341 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 342 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 343 344 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 345 346 diag = a->diag; 347 idiag = a->idiag; 348 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 349 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 350 351 if (flag & SOR_ZERO_INITIAL_GUESS) { 352 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 353 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 354 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 355 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 356 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 357 i2 = 4; 358 idiag += 16; 359 for (i=1; i<m; i++) { 360 v = aa + 16*ai[i]; 361 vi = aj + ai[i]; 362 nz = diag[i] - ai[i]; 363 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 364 while (nz--) { 365 idx = 4*(*vi++); 366 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 367 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 368 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 369 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 370 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 371 v += 16; 372 } 373 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 374 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 375 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 376 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 377 idiag += 16; 378 i2 += 4; 379 } 380 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 381 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 382 } 383 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 384 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 385 i2 = 0; 386 mdiag = a->idiag+16*a->mbs; 387 for (i=0; i<m; i++) { 388 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 389 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 390 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 391 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 392 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 393 mdiag += 16; 394 i2 += 4; 395 } 396 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 397 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 398 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 399 } 400 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 401 idiag = a->idiag+16*a->mbs - 16; 402 i2 = 4*m - 4; 403 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 404 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 405 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 406 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 407 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 408 idiag -= 16; 409 i2 -= 4; 410 for (i=m-2; i>=0; i--) { 411 v = aa + 16*(diag[i]+1); 412 vi = aj + diag[i] + 1; 413 nz = ai[i+1] - diag[i] - 1; 414 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 415 while (nz--) { 416 idx = 4*(*vi++); 417 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 418 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 419 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 420 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 421 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 422 v += 16; 423 } 424 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 425 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 426 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 427 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 428 idiag -= 16; 429 i2 -= 4; 430 } 431 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 432 } 433 } else { 434 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 435 } 436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 437 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 443 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 444 { 445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 446 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 447 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 448 PetscErrorCode ierr; 449 PetscInt m = a->mbs,i,i2,nz,idx; 450 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 451 452 PetscFunctionBegin; 453 its = its*lits; 454 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 455 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 456 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 457 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 458 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 459 460 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 461 462 diag = a->diag; 463 idiag = a->idiag; 464 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 465 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 466 467 if (flag & SOR_ZERO_INITIAL_GUESS) { 468 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 469 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 470 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 471 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 472 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 473 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 474 i2 = 5; 475 idiag += 25; 476 for (i=1; i<m; i++) { 477 v = aa + 25*ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 484 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 485 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 486 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 487 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 488 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 489 v += 25; 490 } 491 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 492 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 493 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 494 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 495 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 496 idiag += 25; 497 i2 += 5; 498 } 499 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 500 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 501 } 502 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 503 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 504 i2 = 0; 505 mdiag = a->idiag+25*a->mbs; 506 for (i=0; i<m; i++) { 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 508 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 509 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 510 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 511 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 512 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 513 mdiag += 25; 514 i2 += 5; 515 } 516 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 517 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 518 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 519 } 520 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 521 idiag = a->idiag+25*a->mbs - 25; 522 i2 = 5*m - 5; 523 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 524 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 525 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 526 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 527 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 528 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 529 idiag -= 25; 530 i2 -= 5; 531 for (i=m-2; i>=0; i--) { 532 v = aa + 25*(diag[i]+1); 533 vi = aj + diag[i] + 1; 534 nz = ai[i+1] - diag[i] - 1; 535 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 536 while (nz--) { 537 idx = 5*(*vi++); 538 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 539 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 540 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 541 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 542 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 543 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 544 v += 25; 545 } 546 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 547 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 548 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 549 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 550 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 551 idiag -= 25; 552 i2 -= 5; 553 } 554 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 555 } 556 } else { 557 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 558 } 559 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 560 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 Special version for Fun3d sequential benchmark 566 */ 567 #if defined(PETSC_HAVE_FORTRAN_CAPS) 568 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 569 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 570 #define matsetvaluesblocked4_ matsetvaluesblocked4 571 #endif 572 573 EXTERN_C_BEGIN 574 #undef __FUNCT__ 575 #define __FUNCT__ "matsetvaluesblocked4_" 576 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 577 { 578 Mat A = *AA; 579 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 580 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 581 PetscInt *ai=a->i,*ailen=a->ilen; 582 PetscInt *aj=a->j,stepval; 583 const PetscScalar *value = v; 584 MatScalar *ap,*aa = a->a,*bap; 585 586 PetscFunctionBegin; 587 stepval = (n-1)*4; 588 for (k=0; k<m; k++) { /* loop over added rows */ 589 row = im[k]; 590 rp = aj + ai[row]; 591 ap = aa + 16*ai[row]; 592 nrow = ailen[row]; 593 low = 0; 594 for (l=0; l<n; l++) { /* loop over added columns */ 595 col = in[l]; 596 value = v + k*(stepval+4)*4 + l*4; 597 low = 0; high = nrow; 598 while (high-low > 7) { 599 t = (low+high)/2; 600 if (rp[t] > col) high = t; 601 else low = t; 602 } 603 for (i=low; i<high; i++) { 604 if (rp[i] > col) break; 605 if (rp[i] == col) { 606 bap = ap + 16*i; 607 for (ii=0; ii<4; ii++,value+=stepval) { 608 for (jj=ii; jj<16; jj+=4) { 609 bap[jj] += *value++; 610 } 611 } 612 goto noinsert2; 613 } 614 } 615 N = nrow++ - 1; 616 /* shift up all the later entries in this row */ 617 for (ii=N; ii>=i; ii--) { 618 rp[ii+1] = rp[ii]; 619 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 620 } 621 if (N >= i) { 622 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 623 } 624 rp[i] = col; 625 bap = ap + 16*i; 626 for (ii=0; ii<4; ii++,value+=stepval) { 627 for (jj=ii; jj<16; jj+=4) { 628 bap[jj] = *value++; 629 } 630 } 631 noinsert2:; 632 low = i; 633 } 634 ailen[row] = nrow; 635 } 636 } 637 EXTERN_C_END 638 639 #if defined(PETSC_HAVE_FORTRAN_CAPS) 640 #define matsetvalues4_ MATSETVALUES4 641 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 642 #define matsetvalues4_ matsetvalues4 643 #endif 644 645 EXTERN_C_BEGIN 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatSetValues4_" 648 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 649 { 650 Mat A = *AA; 651 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 652 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 653 PetscInt *ai=a->i,*ailen=a->ilen; 654 PetscInt *aj=a->j,brow,bcol; 655 PetscInt ridx,cidx; 656 MatScalar *ap,value,*aa=a->a,*bap; 657 658 PetscFunctionBegin; 659 for (k=0; k<m; k++) { /* loop over added rows */ 660 row = im[k]; brow = row/4; 661 rp = aj + ai[brow]; 662 ap = aa + 16*ai[brow]; 663 nrow = ailen[brow]; 664 low = 0; 665 for (l=0; l<n; l++) { /* loop over added columns */ 666 col = in[l]; bcol = col/4; 667 ridx = row % 4; cidx = col % 4; 668 value = v[l + k*n]; 669 low = 0; high = nrow; 670 while (high-low > 7) { 671 t = (low+high)/2; 672 if (rp[t] > bcol) high = t; 673 else low = t; 674 } 675 for (i=low; i<high; i++) { 676 if (rp[i] > bcol) break; 677 if (rp[i] == bcol) { 678 bap = ap + 16*i + 4*cidx + ridx; 679 *bap += value; 680 goto noinsert1; 681 } 682 } 683 N = nrow++ - 1; 684 /* shift up all the later entries in this row */ 685 for (ii=N; ii>=i; ii--) { 686 rp[ii+1] = rp[ii]; 687 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 688 } 689 if (N>=i) { 690 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 691 } 692 rp[i] = bcol; 693 ap[16*i + 4*cidx + ridx] = value; 694 noinsert1:; 695 low = i; 696 } 697 ailen[brow] = nrow; 698 } 699 } 700 EXTERN_C_END 701 702 /* UGLY, ugly, ugly 703 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 704 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 705 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 706 converts the entries into single precision and then calls ..._MatScalar() to put them 707 into the single precision data structures. 708 */ 709 #if defined(PETSC_USE_MAT_SINGLE) 710 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 711 #else 712 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 713 #endif 714 715 #define CHUNKSIZE 10 716 717 /* 718 Checks for missing diagonals 719 */ 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 722 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 723 { 724 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 725 PetscErrorCode ierr; 726 PetscInt *diag,*jj = a->j,i; 727 728 PetscFunctionBegin; 729 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 730 diag = a->diag; 731 for (i=0; i<a->mbs; i++) { 732 if (jj[diag[i]] != i) { 733 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 734 } 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 741 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 742 { 743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 744 PetscErrorCode ierr; 745 PetscInt i,j,*diag,m = a->mbs; 746 747 PetscFunctionBegin; 748 if (a->diag) PetscFunctionReturn(0); 749 750 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 751 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 752 for (i=0; i<m; i++) { 753 diag[i] = a->i[i+1]; 754 for (j=a->i[i]; j<a->i[i+1]; j++) { 755 if (a->j[j] == i) { 756 diag[i] = j; 757 break; 758 } 759 } 760 } 761 a->diag = diag; 762 PetscFunctionReturn(0); 763 } 764 765 766 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 770 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 771 { 772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 773 PetscErrorCode ierr; 774 PetscInt n = a->mbs,i; 775 776 PetscFunctionBegin; 777 *nn = n; 778 if (!ia) PetscFunctionReturn(0); 779 if (symmetric) { 780 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 781 } else if (oshift == 1) { 782 /* temporarily add 1 to i and j indices */ 783 PetscInt nz = a->i[n]; 784 for (i=0; i<nz; i++) a->j[i]++; 785 for (i=0; i<n+1; i++) a->i[i]++; 786 *ia = a->i; *ja = a->j; 787 } else { 788 *ia = a->i; *ja = a->j; 789 } 790 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 796 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 797 { 798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 799 PetscErrorCode ierr; 800 PetscInt i,n = a->mbs; 801 802 PetscFunctionBegin; 803 if (!ia) PetscFunctionReturn(0); 804 if (symmetric) { 805 ierr = PetscFree(*ia);CHKERRQ(ierr); 806 ierr = PetscFree(*ja);CHKERRQ(ierr); 807 } else if (oshift == 1) { 808 PetscInt nz = a->i[n]-1; 809 for (i=0; i<nz; i++) a->j[i]--; 810 for (i=0; i<n+1; i++) a->i[i]--; 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatDestroy_SeqBAIJ" 817 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 818 { 819 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 #if defined(PETSC_USE_LOG) 824 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 825 #endif 826 ierr = PetscFree(a->a);CHKERRQ(ierr); 827 if (!a->singlemalloc) { 828 ierr = PetscFree(a->i);CHKERRQ(ierr); 829 ierr = PetscFree(a->j);CHKERRQ(ierr); 830 } 831 if (a->row) { 832 ierr = ISDestroy(a->row);CHKERRQ(ierr); 833 } 834 if (a->col) { 835 ierr = ISDestroy(a->col);CHKERRQ(ierr); 836 } 837 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 838 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 839 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 840 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 841 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 842 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 843 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 844 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 845 #if defined(PETSC_USE_MAT_SINGLE) 846 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 847 #endif 848 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 849 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 850 851 ierr = PetscFree(a);CHKERRQ(ierr); 852 853 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatSetOption_SeqBAIJ" 865 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 866 { 867 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 switch (op) { 872 case MAT_ROW_ORIENTED: 873 a->roworiented = PETSC_TRUE; 874 break; 875 case MAT_COLUMN_ORIENTED: 876 a->roworiented = PETSC_FALSE; 877 break; 878 case MAT_COLUMNS_SORTED: 879 a->sorted = PETSC_TRUE; 880 break; 881 case MAT_COLUMNS_UNSORTED: 882 a->sorted = PETSC_FALSE; 883 break; 884 case MAT_KEEP_ZEROED_ROWS: 885 a->keepzeroedrows = PETSC_TRUE; 886 break; 887 case MAT_NO_NEW_NONZERO_LOCATIONS: 888 a->nonew = 1; 889 break; 890 case MAT_NEW_NONZERO_LOCATION_ERR: 891 a->nonew = -1; 892 break; 893 case MAT_NEW_NONZERO_ALLOCATION_ERR: 894 a->nonew = -2; 895 break; 896 case MAT_YES_NEW_NONZERO_LOCATIONS: 897 a->nonew = 0; 898 break; 899 case MAT_ROWS_SORTED: 900 case MAT_ROWS_UNSORTED: 901 case MAT_YES_NEW_DIAGONALS: 902 case MAT_IGNORE_OFF_PROC_ENTRIES: 903 case MAT_USE_HASH_TABLE: 904 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 905 break; 906 case MAT_NO_NEW_DIAGONALS: 907 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 908 case MAT_SYMMETRIC: 909 case MAT_STRUCTURALLY_SYMMETRIC: 910 case MAT_NOT_SYMMETRIC: 911 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 912 case MAT_HERMITIAN: 913 case MAT_NOT_HERMITIAN: 914 case MAT_SYMMETRY_ETERNAL: 915 case MAT_NOT_SYMMETRY_ETERNAL: 916 break; 917 default: 918 SETERRQ(PETSC_ERR_SUP,"unknown option"); 919 } 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatGetRow_SeqBAIJ" 925 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 926 { 927 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 928 PetscErrorCode ierr; 929 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 930 MatScalar *aa,*aa_i; 931 PetscScalar *v_i; 932 933 PetscFunctionBegin; 934 bs = A->bs; 935 ai = a->i; 936 aj = a->j; 937 aa = a->a; 938 bs2 = a->bs2; 939 940 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 941 942 bn = row/bs; /* Block number */ 943 bp = row % bs; /* Block Position */ 944 M = ai[bn+1] - ai[bn]; 945 *nz = bs*M; 946 947 if (v) { 948 *v = 0; 949 if (*nz) { 950 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 951 for (i=0; i<M; i++) { /* for each block in the block row */ 952 v_i = *v + i*bs; 953 aa_i = aa + bs2*(ai[bn] + i); 954 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 955 } 956 } 957 } 958 959 if (idx) { 960 *idx = 0; 961 if (*nz) { 962 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 963 for (i=0; i<M; i++) { /* for each block in the block row */ 964 idx_i = *idx + i*bs; 965 itmp = bs*aj[ai[bn] + i]; 966 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 967 } 968 } 969 } 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 975 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 976 { 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 981 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatTranspose_SeqBAIJ" 987 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 988 { 989 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 990 Mat C; 991 PetscErrorCode ierr; 992 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 993 PetscInt *rows,*cols,bs2=a->bs2; 994 PetscScalar *array; 995 996 PetscFunctionBegin; 997 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 998 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 999 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1000 1001 #if defined(PETSC_USE_MAT_SINGLE) 1002 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1003 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1004 #else 1005 array = a->a; 1006 #endif 1007 1008 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1009 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 1010 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1011 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1012 ierr = PetscFree(col);CHKERRQ(ierr); 1013 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1014 cols = rows + bs; 1015 for (i=0; i<mbs; i++) { 1016 cols[0] = i*bs; 1017 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1018 len = ai[i+1] - ai[i]; 1019 for (j=0; j<len; j++) { 1020 rows[0] = (*aj++)*bs; 1021 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1022 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1023 array += bs2; 1024 } 1025 } 1026 ierr = PetscFree(rows);CHKERRQ(ierr); 1027 #if defined(PETSC_USE_MAT_SINGLE) 1028 ierr = PetscFree(array); 1029 #endif 1030 1031 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1033 1034 if (B) { 1035 *B = C; 1036 } else { 1037 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1045 { 1046 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1047 PetscErrorCode ierr; 1048 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1049 int fd; 1050 PetscScalar *aa; 1051 FILE *file; 1052 1053 PetscFunctionBegin; 1054 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1055 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1056 col_lens[0] = MAT_FILE_COOKIE; 1057 1058 col_lens[1] = A->m; 1059 col_lens[2] = A->n; 1060 col_lens[3] = a->nz*bs2; 1061 1062 /* store lengths of each row and write (including header) to file */ 1063 count = 0; 1064 for (i=0; i<a->mbs; i++) { 1065 for (j=0; j<bs; j++) { 1066 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1067 } 1068 } 1069 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1070 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1071 1072 /* store column indices (zero start index) */ 1073 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1074 count = 0; 1075 for (i=0; i<a->mbs; i++) { 1076 for (j=0; j<bs; j++) { 1077 for (k=a->i[i]; k<a->i[i+1]; k++) { 1078 for (l=0; l<bs; l++) { 1079 jj[count++] = bs*a->j[k] + l; 1080 } 1081 } 1082 } 1083 } 1084 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1085 ierr = PetscFree(jj);CHKERRQ(ierr); 1086 1087 /* store nonzero values */ 1088 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1089 count = 0; 1090 for (i=0; i<a->mbs; i++) { 1091 for (j=0; j<bs; j++) { 1092 for (k=a->i[i]; k<a->i[i+1]; k++) { 1093 for (l=0; l<bs; l++) { 1094 aa[count++] = a->a[bs2*k + l*bs + j]; 1095 } 1096 } 1097 } 1098 } 1099 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1100 ierr = PetscFree(aa);CHKERRQ(ierr); 1101 1102 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1103 if (file) { 1104 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1116 PetscViewerFormat format; 1117 1118 PetscFunctionBegin; 1119 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1120 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1121 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1122 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1123 Mat aij; 1124 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1125 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1126 ierr = MatDestroy(aij);CHKERRQ(ierr); 1127 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1128 PetscFunctionReturn(0); 1129 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1130 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1131 for (i=0; i<a->mbs; i++) { 1132 for (j=0; j<bs; j++) { 1133 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1134 for (k=a->i[i]; k<a->i[i+1]; k++) { 1135 for (l=0; l<bs; l++) { 1136 #if defined(PETSC_USE_COMPLEX) 1137 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1138 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1139 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1140 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1141 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1142 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1143 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1144 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1145 } 1146 #else 1147 if (a->a[bs2*k + l*bs + j] != 0.0) { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1149 } 1150 #endif 1151 } 1152 } 1153 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1154 } 1155 } 1156 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1157 } else { 1158 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1159 for (i=0; i<a->mbs; i++) { 1160 for (j=0; j<bs; j++) { 1161 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1162 for (k=a->i[i]; k<a->i[i+1]; k++) { 1163 for (l=0; l<bs; l++) { 1164 #if defined(PETSC_USE_COMPLEX) 1165 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1166 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1167 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1168 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1169 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1170 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1171 } else { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1173 } 1174 #else 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1176 #endif 1177 } 1178 } 1179 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1180 } 1181 } 1182 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1191 { 1192 Mat A = (Mat) Aa; 1193 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1194 PetscErrorCode ierr; 1195 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1196 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1197 MatScalar *aa; 1198 PetscViewer viewer; 1199 1200 PetscFunctionBegin; 1201 1202 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1203 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1204 1205 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1206 1207 /* loop over matrix elements drawing boxes */ 1208 color = PETSC_DRAW_BLUE; 1209 for (i=0,row=0; i<mbs; i++,row+=bs) { 1210 for (j=a->i[i]; j<a->i[i+1]; j++) { 1211 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1212 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1213 aa = a->a + j*bs2; 1214 for (k=0; k<bs; k++) { 1215 for (l=0; l<bs; l++) { 1216 if (PetscRealPart(*aa++) >= 0.) continue; 1217 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1218 } 1219 } 1220 } 1221 } 1222 color = PETSC_DRAW_CYAN; 1223 for (i=0,row=0; i<mbs; i++,row+=bs) { 1224 for (j=a->i[i]; j<a->i[i+1]; j++) { 1225 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1226 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1227 aa = a->a + j*bs2; 1228 for (k=0; k<bs; k++) { 1229 for (l=0; l<bs; l++) { 1230 if (PetscRealPart(*aa++) != 0.) continue; 1231 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1232 } 1233 } 1234 } 1235 } 1236 1237 color = PETSC_DRAW_RED; 1238 for (i=0,row=0; i<mbs; i++,row+=bs) { 1239 for (j=a->i[i]; j<a->i[i+1]; j++) { 1240 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1241 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1242 aa = a->a + j*bs2; 1243 for (k=0; k<bs; k++) { 1244 for (l=0; l<bs; l++) { 1245 if (PetscRealPart(*aa++) <= 0.) continue; 1246 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1247 } 1248 } 1249 } 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1257 { 1258 PetscErrorCode ierr; 1259 PetscReal xl,yl,xr,yr,w,h; 1260 PetscDraw draw; 1261 PetscTruth isnull; 1262 1263 PetscFunctionBegin; 1264 1265 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1266 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1267 1268 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1269 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1270 xr += w; yr += h; xl = -w; yl = -h; 1271 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1272 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1273 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatView_SeqBAIJ" 1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1280 { 1281 PetscErrorCode ierr; 1282 PetscTruth iascii,isbinary,isdraw; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1286 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1287 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1288 if (iascii){ 1289 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1290 } else if (isbinary) { 1291 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1292 } else if (isdraw) { 1293 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1294 } else { 1295 Mat B; 1296 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1297 ierr = MatView(B,viewer);CHKERRQ(ierr); 1298 ierr = MatDestroy(B);CHKERRQ(ierr); 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1307 { 1308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1309 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1310 PetscInt *ai = a->i,*ailen = a->ilen; 1311 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1312 MatScalar *ap,*aa = a->a,zero = 0.0; 1313 1314 PetscFunctionBegin; 1315 for (k=0; k<m; k++) { /* loop over rows */ 1316 row = im[k]; brow = row/bs; 1317 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1318 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1319 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1320 nrow = ailen[brow]; 1321 for (l=0; l<n; l++) { /* loop over columns */ 1322 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1323 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1324 col = in[l] ; 1325 bcol = col/bs; 1326 cidx = col%bs; 1327 ridx = row%bs; 1328 high = nrow; 1329 low = 0; /* assume unsorted */ 1330 while (high-low > 5) { 1331 t = (low+high)/2; 1332 if (rp[t] > bcol) high = t; 1333 else low = t; 1334 } 1335 for (i=low; i<high; i++) { 1336 if (rp[i] > bcol) break; 1337 if (rp[i] == bcol) { 1338 *v++ = ap[bs2*i+bs*cidx+ridx]; 1339 goto finished; 1340 } 1341 } 1342 *v++ = zero; 1343 finished:; 1344 } 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #if defined(PETSC_USE_MAT_SINGLE) 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1353 { 1354 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1355 PetscErrorCode ierr; 1356 PetscInt i,N = m*n*b->bs2; 1357 MatScalar *vsingle; 1358 1359 PetscFunctionBegin; 1360 if (N > b->setvalueslen) { 1361 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1362 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1363 b->setvalueslen = N; 1364 } 1365 vsingle = b->setvaluescopy; 1366 for (i=0; i<N; i++) { 1367 vsingle[i] = v[i]; 1368 } 1369 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 #endif 1373 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1378 { 1379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1380 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1381 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1382 PetscErrorCode ierr; 1383 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1384 PetscTruth roworiented=a->roworiented; 1385 const MatScalar *value = v; 1386 MatScalar *ap,*aa = a->a,*bap; 1387 1388 PetscFunctionBegin; 1389 if (roworiented) { 1390 stepval = (n-1)*bs; 1391 } else { 1392 stepval = (m-1)*bs; 1393 } 1394 for (k=0; k<m; k++) { /* loop over added rows */ 1395 row = im[k]; 1396 if (row < 0) continue; 1397 #if defined(PETSC_USE_DEBUG) 1398 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1399 #endif 1400 rp = aj + ai[row]; 1401 ap = aa + bs2*ai[row]; 1402 rmax = imax[row]; 1403 nrow = ailen[row]; 1404 low = 0; 1405 high = nrow; 1406 for (l=0; l<n; l++) { /* loop over added columns */ 1407 if (in[l] < 0) continue; 1408 #if defined(PETSC_USE_DEBUG) 1409 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1410 #endif 1411 col = in[l]; 1412 if (roworiented) { 1413 value = v + k*(stepval+bs)*bs + l*bs; 1414 } else { 1415 value = v + l*(stepval+bs)*bs + k*bs; 1416 } 1417 if (col < lastcol) low = 0; else high = nrow; 1418 lastcol = col; 1419 while (high-low > 7) { 1420 t = (low+high)/2; 1421 if (rp[t] > col) high = t; 1422 else low = t; 1423 } 1424 for (i=low; i<high; i++) { 1425 if (rp[i] > col) break; 1426 if (rp[i] == col) { 1427 bap = ap + bs2*i; 1428 if (roworiented) { 1429 if (is == ADD_VALUES) { 1430 for (ii=0; ii<bs; ii++,value+=stepval) { 1431 for (jj=ii; jj<bs2; jj+=bs) { 1432 bap[jj] += *value++; 1433 } 1434 } 1435 } else { 1436 for (ii=0; ii<bs; ii++,value+=stepval) { 1437 for (jj=ii; jj<bs2; jj+=bs) { 1438 bap[jj] = *value++; 1439 } 1440 } 1441 } 1442 } else { 1443 if (is == ADD_VALUES) { 1444 for (ii=0; ii<bs; ii++,value+=stepval) { 1445 for (jj=0; jj<bs; jj++) { 1446 *bap++ += *value++; 1447 } 1448 } 1449 } else { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=0; jj<bs; jj++) { 1452 *bap++ = *value++; 1453 } 1454 } 1455 } 1456 } 1457 goto noinsert2; 1458 } 1459 } 1460 if (nonew == 1) goto noinsert2; 1461 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1462 if (nrow >= rmax) { 1463 /* there is no extra room in row, therefore enlarge */ 1464 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1465 MatScalar *new_a; 1466 1467 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1468 1469 /* malloc new storage space */ 1470 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1471 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1472 new_j = (PetscInt*)(new_a + bs2*new_nz); 1473 new_i = new_j + new_nz; 1474 1475 /* copy over old data into new slots */ 1476 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1477 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1478 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1479 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1480 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1481 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1483 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1484 /* free up old matrix storage */ 1485 ierr = PetscFree(a->a);CHKERRQ(ierr); 1486 if (!a->singlemalloc) { 1487 ierr = PetscFree(a->i);CHKERRQ(ierr); 1488 ierr = PetscFree(a->j);CHKERRQ(ierr); 1489 } 1490 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1491 a->singlemalloc = PETSC_TRUE; 1492 1493 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1494 rmax = imax[row] = imax[row] + CHUNKSIZE; 1495 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1496 a->maxnz += bs2*CHUNKSIZE; 1497 a->reallocs++; 1498 a->nz++; 1499 } 1500 N = nrow++ - 1; 1501 /* shift up all the later entries in this row */ 1502 for (ii=N; ii>=i; ii--) { 1503 rp[ii+1] = rp[ii]; 1504 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1505 } 1506 if (N >= i) { 1507 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1508 } 1509 rp[i] = col; 1510 bap = ap + bs2*i; 1511 if (roworiented) { 1512 for (ii=0; ii<bs; ii++,value+=stepval) { 1513 for (jj=ii; jj<bs2; jj+=bs) { 1514 bap[jj] = *value++; 1515 } 1516 } 1517 } else { 1518 for (ii=0; ii<bs; ii++,value+=stepval) { 1519 for (jj=0; jj<bs; jj++) { 1520 *bap++ = *value++; 1521 } 1522 } 1523 } 1524 noinsert2:; 1525 low = i; 1526 } 1527 ailen[row] = nrow; 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1534 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1535 { 1536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1537 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1538 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1539 PetscErrorCode ierr; 1540 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1541 MatScalar *aa = a->a,*ap; 1542 PetscReal ratio=0.6; 1543 1544 PetscFunctionBegin; 1545 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1546 1547 if (m) rmax = ailen[0]; 1548 for (i=1; i<mbs; i++) { 1549 /* move each row back by the amount of empty slots (fshift) before it*/ 1550 fshift += imax[i-1] - ailen[i-1]; 1551 rmax = PetscMax(rmax,ailen[i]); 1552 if (fshift) { 1553 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1554 N = ailen[i]; 1555 for (j=0; j<N; j++) { 1556 ip[j-fshift] = ip[j]; 1557 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1558 } 1559 } 1560 ai[i] = ai[i-1] + ailen[i-1]; 1561 } 1562 if (mbs) { 1563 fshift += imax[mbs-1] - ailen[mbs-1]; 1564 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1565 } 1566 /* reset ilen and imax for each row */ 1567 for (i=0; i<mbs; i++) { 1568 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1569 } 1570 a->nz = ai[mbs]; 1571 1572 /* diagonals may have moved, so kill the diagonal pointers */ 1573 a->idiagvalid = PETSC_FALSE; 1574 if (fshift && a->diag) { 1575 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1576 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1577 a->diag = 0; 1578 } 1579 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1580 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1581 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1582 a->reallocs = 0; 1583 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1584 1585 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1586 if (a->compressedrow.use){ 1587 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1588 } 1589 1590 A->same_nonzero = PETSC_TRUE; 1591 PetscFunctionReturn(0); 1592 } 1593 1594 /* 1595 This function returns an array of flags which indicate the locations of contiguous 1596 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1597 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1598 Assume: sizes should be long enough to hold all the values. 1599 */ 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1602 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1603 { 1604 PetscInt i,j,k,row; 1605 PetscTruth flg; 1606 1607 PetscFunctionBegin; 1608 for (i=0,j=0; i<n; j++) { 1609 row = idx[i]; 1610 if (row%bs!=0) { /* Not the begining of a block */ 1611 sizes[j] = 1; 1612 i++; 1613 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1614 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1615 i++; 1616 } else { /* Begining of the block, so check if the complete block exists */ 1617 flg = PETSC_TRUE; 1618 for (k=1; k<bs; k++) { 1619 if (row+k != idx[i+k]) { /* break in the block */ 1620 flg = PETSC_FALSE; 1621 break; 1622 } 1623 } 1624 if (flg) { /* No break in the bs */ 1625 sizes[j] = bs; 1626 i+= bs; 1627 } else { 1628 sizes[j] = 1; 1629 i++; 1630 } 1631 } 1632 } 1633 *bs_max = j; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1639 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1640 { 1641 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1642 PetscErrorCode ierr; 1643 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1644 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1645 PetscScalar zero = 0.0; 1646 MatScalar *aa; 1647 1648 PetscFunctionBegin; 1649 /* Make a copy of the IS and sort it */ 1650 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1651 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1652 1653 /* allocate memory for rows,sizes */ 1654 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1655 sizes = rows + is_n; 1656 1657 /* copy IS values to rows, and sort them */ 1658 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1659 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1660 if (baij->keepzeroedrows) { 1661 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1662 bs_max = is_n; 1663 A->same_nonzero = PETSC_TRUE; 1664 } else { 1665 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1666 A->same_nonzero = PETSC_FALSE; 1667 } 1668 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1669 1670 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1671 row = rows[j]; 1672 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1673 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1674 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1675 if (sizes[i] == bs && !baij->keepzeroedrows) { 1676 if (diag) { 1677 if (baij->ilen[row/bs] > 0) { 1678 baij->ilen[row/bs] = 1; 1679 baij->j[baij->i[row/bs]] = row/bs; 1680 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1681 } 1682 /* Now insert all the diagonal values for this bs */ 1683 for (k=0; k<bs; k++) { 1684 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1685 } 1686 } else { /* (!diag) */ 1687 baij->ilen[row/bs] = 0; 1688 } /* end (!diag) */ 1689 } else { /* (sizes[i] != bs) */ 1690 #if defined (PETSC_USE_DEBUG) 1691 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1692 #endif 1693 for (k=0; k<count; k++) { 1694 aa[0] = zero; 1695 aa += bs; 1696 } 1697 if (diag) { 1698 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 1703 ierr = PetscFree(rows);CHKERRQ(ierr); 1704 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1710 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1711 { 1712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1713 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1714 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1715 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1716 PetscErrorCode ierr; 1717 PetscInt ridx,cidx,bs2=a->bs2; 1718 PetscTruth roworiented=a->roworiented; 1719 MatScalar *ap,value,*aa=a->a,*bap; 1720 1721 PetscFunctionBegin; 1722 for (k=0; k<m; k++) { /* loop over added rows */ 1723 row = im[k]; brow = row/bs; 1724 if (row < 0) continue; 1725 #if defined(PETSC_USE_DEBUG) 1726 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1727 #endif 1728 rp = aj + ai[brow]; 1729 ap = aa + bs2*ai[brow]; 1730 rmax = imax[brow]; 1731 nrow = ailen[brow]; 1732 low = 0; 1733 high = nrow; 1734 for (l=0; l<n; l++) { /* loop over added columns */ 1735 if (in[l] < 0) continue; 1736 #if defined(PETSC_USE_DEBUG) 1737 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1738 #endif 1739 col = in[l]; bcol = col/bs; 1740 ridx = row % bs; cidx = col % bs; 1741 if (roworiented) { 1742 value = v[l + k*n]; 1743 } else { 1744 value = v[k + l*m]; 1745 } 1746 if (col < lastcol) low = 0; else high = nrow; 1747 lastcol = col; 1748 while (high-low > 7) { 1749 t = (low+high)/2; 1750 if (rp[t] > bcol) high = t; 1751 else low = t; 1752 } 1753 for (i=low; i<high; i++) { 1754 if (rp[i] > bcol) break; 1755 if (rp[i] == bcol) { 1756 bap = ap + bs2*i + bs*cidx + ridx; 1757 if (is == ADD_VALUES) *bap += value; 1758 else *bap = value; 1759 goto noinsert1; 1760 } 1761 } 1762 if (nonew == 1) goto noinsert1; 1763 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1764 if (nrow >= rmax) { 1765 /* there is no extra room in row, therefore enlarge */ 1766 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1767 MatScalar *new_a; 1768 1769 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1770 1771 /* Malloc new storage space */ 1772 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1773 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1774 new_j = (PetscInt*)(new_a + bs2*new_nz); 1775 new_i = new_j + new_nz; 1776 1777 /* copy over old data into new slots */ 1778 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1779 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1780 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1781 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1782 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1783 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1784 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1785 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1786 /* free up old matrix storage */ 1787 ierr = PetscFree(a->a);CHKERRQ(ierr); 1788 if (!a->singlemalloc) { 1789 ierr = PetscFree(a->i);CHKERRQ(ierr); 1790 ierr = PetscFree(a->j);CHKERRQ(ierr); 1791 } 1792 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1793 a->singlemalloc = PETSC_TRUE; 1794 1795 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1796 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1797 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1798 a->maxnz += bs2*CHUNKSIZE; 1799 a->reallocs++; 1800 a->nz++; 1801 } 1802 N = nrow++ - 1; 1803 /* shift up all the later entries in this row */ 1804 for (ii=N; ii>=i; ii--) { 1805 rp[ii+1] = rp[ii]; 1806 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1807 } 1808 if (N>=i) { 1809 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1810 } 1811 rp[i] = bcol; 1812 ap[bs2*i + bs*cidx + ridx] = value; 1813 noinsert1:; 1814 low = i; 1815 } 1816 ailen[brow] = nrow; 1817 } 1818 A->same_nonzero = PETSC_FALSE; 1819 PetscFunctionReturn(0); 1820 } 1821 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1825 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1826 { 1827 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1828 Mat outA; 1829 PetscErrorCode ierr; 1830 PetscTruth row_identity,col_identity; 1831 1832 PetscFunctionBegin; 1833 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1834 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1835 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1836 if (!row_identity || !col_identity) { 1837 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1838 } 1839 1840 outA = inA; 1841 inA->factor = FACTOR_LU; 1842 1843 if (!a->diag) { 1844 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1845 } 1846 1847 a->row = row; 1848 a->col = col; 1849 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1850 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1851 1852 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1853 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1854 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1855 1856 /* 1857 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1858 for ILU(0) factorization with natural ordering 1859 */ 1860 if (inA->bs < 8) { 1861 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1862 } else { 1863 if (!a->solve_work) { 1864 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1865 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1866 } 1867 } 1868 1869 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1870 1871 PetscFunctionReturn(0); 1872 } 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1875 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1876 { 1877 static PetscTruth called = PETSC_FALSE; 1878 MPI_Comm comm = A->comm; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1883 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1884 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 EXTERN_C_BEGIN 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1891 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1892 { 1893 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1894 PetscInt i,nz,nbs; 1895 1896 PetscFunctionBegin; 1897 nz = baij->maxnz/baij->bs2; 1898 nbs = baij->nbs; 1899 for (i=0; i<nz; i++) { 1900 baij->j[i] = indices[i]; 1901 } 1902 baij->nz = nz; 1903 for (i=0; i<nbs; i++) { 1904 baij->ilen[i] = baij->imax[i]; 1905 } 1906 1907 PetscFunctionReturn(0); 1908 } 1909 EXTERN_C_END 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1913 /*@ 1914 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1915 in the matrix. 1916 1917 Input Parameters: 1918 + mat - the SeqBAIJ matrix 1919 - indices - the column indices 1920 1921 Level: advanced 1922 1923 Notes: 1924 This can be called if you have precomputed the nonzero structure of the 1925 matrix and want to provide it to the matrix object to improve the performance 1926 of the MatSetValues() operation. 1927 1928 You MUST have set the correct numbers of nonzeros per row in the call to 1929 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1930 1931 MUST be called before any calls to MatSetValues(); 1932 1933 @*/ 1934 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1935 { 1936 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1937 1938 PetscFunctionBegin; 1939 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1940 PetscValidPointer(indices,2); 1941 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1942 if (f) { 1943 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1944 } else { 1945 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1952 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1953 { 1954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1955 PetscErrorCode ierr; 1956 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1957 PetscReal atmp; 1958 PetscScalar *x,zero = 0.0; 1959 MatScalar *aa; 1960 PetscInt ncols,brow,krow,kcol; 1961 1962 PetscFunctionBegin; 1963 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1964 bs = A->bs; 1965 aa = a->a; 1966 ai = a->i; 1967 aj = a->j; 1968 mbs = a->mbs; 1969 1970 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1971 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1972 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1973 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1974 for (i=0; i<mbs; i++) { 1975 ncols = ai[1] - ai[0]; ai++; 1976 brow = bs*i; 1977 for (j=0; j<ncols; j++){ 1978 /* bcol = bs*(*aj); */ 1979 for (kcol=0; kcol<bs; kcol++){ 1980 for (krow=0; krow<bs; krow++){ 1981 atmp = PetscAbsScalar(*aa); aa++; 1982 row = brow + krow; /* row index */ 1983 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1984 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1985 } 1986 } 1987 aj++; 1988 } 1989 } 1990 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1996 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1997 { 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2007 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2008 { 2009 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2010 PetscFunctionBegin; 2011 *array = a->a; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2017 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2018 { 2019 PetscFunctionBegin; 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #include "petscblaslapack.h" 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2026 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 2027 { 2028 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2029 PetscErrorCode ierr; 2030 PetscInt i,bs=Y->bs,j,bs2; 2031 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 2032 2033 PetscFunctionBegin; 2034 if (str == SAME_NONZERO_PATTERN) { 2035 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2036 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2037 if (y->xtoy && y->XtoY != X) { 2038 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2039 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2040 } 2041 if (!y->xtoy) { /* get xtoy */ 2042 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2043 y->XtoY = X; 2044 } 2045 bs2 = bs*bs; 2046 for (i=0; i<x->nz; i++) { 2047 j = 0; 2048 while (j < bs2){ 2049 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2050 j++; 2051 } 2052 } 2053 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2054 } else { 2055 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2056 } 2057 PetscFunctionReturn(0); 2058 } 2059 2060 /* -------------------------------------------------------------------*/ 2061 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2062 MatGetRow_SeqBAIJ, 2063 MatRestoreRow_SeqBAIJ, 2064 MatMult_SeqBAIJ_N, 2065 /* 4*/ MatMultAdd_SeqBAIJ_N, 2066 MatMultTranspose_SeqBAIJ, 2067 MatMultTransposeAdd_SeqBAIJ, 2068 MatSolve_SeqBAIJ_N, 2069 0, 2070 0, 2071 /*10*/ 0, 2072 MatLUFactor_SeqBAIJ, 2073 0, 2074 0, 2075 MatTranspose_SeqBAIJ, 2076 /*15*/ MatGetInfo_SeqBAIJ, 2077 MatEqual_SeqBAIJ, 2078 MatGetDiagonal_SeqBAIJ, 2079 MatDiagonalScale_SeqBAIJ, 2080 MatNorm_SeqBAIJ, 2081 /*20*/ 0, 2082 MatAssemblyEnd_SeqBAIJ, 2083 0, 2084 MatSetOption_SeqBAIJ, 2085 MatZeroEntries_SeqBAIJ, 2086 /*25*/ MatZeroRows_SeqBAIJ, 2087 MatLUFactorSymbolic_SeqBAIJ, 2088 MatLUFactorNumeric_SeqBAIJ_N, 2089 MatCholeskyFactorSymbolic_SeqBAIJ, 2090 MatCholeskyFactorNumeric_SeqBAIJ_N, 2091 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2092 MatILUFactorSymbolic_SeqBAIJ, 2093 MatICCFactorSymbolic_SeqBAIJ, 2094 MatGetArray_SeqBAIJ, 2095 MatRestoreArray_SeqBAIJ, 2096 /*35*/ MatDuplicate_SeqBAIJ, 2097 0, 2098 0, 2099 MatILUFactor_SeqBAIJ, 2100 0, 2101 /*40*/ MatAXPY_SeqBAIJ, 2102 MatGetSubMatrices_SeqBAIJ, 2103 MatIncreaseOverlap_SeqBAIJ, 2104 MatGetValues_SeqBAIJ, 2105 0, 2106 /*45*/ MatPrintHelp_SeqBAIJ, 2107 MatScale_SeqBAIJ, 2108 0, 2109 0, 2110 0, 2111 /*50*/ 0, 2112 MatGetRowIJ_SeqBAIJ, 2113 MatRestoreRowIJ_SeqBAIJ, 2114 0, 2115 0, 2116 /*55*/ 0, 2117 0, 2118 0, 2119 0, 2120 MatSetValuesBlocked_SeqBAIJ, 2121 /*60*/ MatGetSubMatrix_SeqBAIJ, 2122 MatDestroy_SeqBAIJ, 2123 MatView_SeqBAIJ, 2124 MatGetPetscMaps_Petsc, 2125 0, 2126 /*65*/ 0, 2127 0, 2128 0, 2129 0, 2130 0, 2131 /*70*/ MatGetRowMax_SeqBAIJ, 2132 MatConvert_Basic, 2133 0, 2134 0, 2135 0, 2136 /*75*/ 0, 2137 0, 2138 0, 2139 0, 2140 0, 2141 /*80*/ 0, 2142 0, 2143 0, 2144 0, 2145 MatLoad_SeqBAIJ, 2146 /*85*/ 0, 2147 0, 2148 0, 2149 0, 2150 0, 2151 /*90*/ 0, 2152 0, 2153 0, 2154 0, 2155 0, 2156 /*95*/ 0, 2157 0, 2158 0, 2159 0}; 2160 2161 EXTERN_C_BEGIN 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2164 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2165 { 2166 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2167 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2168 PetscErrorCode ierr; 2169 2170 PetscFunctionBegin; 2171 if (aij->nonew != 1) { 2172 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2173 } 2174 2175 /* allocate space for values if not already there */ 2176 if (!aij->saved_values) { 2177 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2178 } 2179 2180 /* copy values over */ 2181 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 EXTERN_C_END 2185 2186 EXTERN_C_BEGIN 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2189 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2190 { 2191 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2192 PetscErrorCode ierr; 2193 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2194 2195 PetscFunctionBegin; 2196 if (aij->nonew != 1) { 2197 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2198 } 2199 if (!aij->saved_values) { 2200 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2201 } 2202 2203 /* copy values over */ 2204 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 EXTERN_C_END 2208 2209 EXTERN_C_BEGIN 2210 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2211 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2212 EXTERN_C_END 2213 2214 EXTERN_C_BEGIN 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2217 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2218 { 2219 Mat_SeqBAIJ *b; 2220 PetscErrorCode ierr; 2221 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2222 PetscTruth flg; 2223 2224 PetscFunctionBegin; 2225 2226 B->preallocated = PETSC_TRUE; 2227 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2228 if (nnz && newbs != bs) { 2229 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2230 } 2231 bs = newbs; 2232 2233 mbs = B->m/bs; 2234 nbs = B->n/bs; 2235 bs2 = bs*bs; 2236 2237 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2238 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2239 } 2240 2241 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2242 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2243 if (nnz) { 2244 for (i=0; i<mbs; i++) { 2245 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2246 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2247 } 2248 } 2249 2250 b = (Mat_SeqBAIJ*)B->data; 2251 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2252 B->ops->solve = MatSolve_SeqBAIJ_Update; 2253 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2254 if (!flg) { 2255 switch (bs) { 2256 case 1: 2257 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2258 B->ops->mult = MatMult_SeqBAIJ_1; 2259 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2260 break; 2261 case 2: 2262 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2263 B->ops->mult = MatMult_SeqBAIJ_2; 2264 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2265 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2266 break; 2267 case 3: 2268 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2269 B->ops->mult = MatMult_SeqBAIJ_3; 2270 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2271 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2272 break; 2273 case 4: 2274 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2275 B->ops->mult = MatMult_SeqBAIJ_4; 2276 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2277 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2278 break; 2279 case 5: 2280 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2281 B->ops->mult = MatMult_SeqBAIJ_5; 2282 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2283 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2284 break; 2285 case 6: 2286 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2287 B->ops->mult = MatMult_SeqBAIJ_6; 2288 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2289 break; 2290 case 7: 2291 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2292 B->ops->mult = MatMult_SeqBAIJ_7; 2293 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2294 break; 2295 default: 2296 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2297 B->ops->mult = MatMult_SeqBAIJ_N; 2298 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2299 break; 2300 } 2301 } 2302 B->bs = bs; 2303 b->mbs = mbs; 2304 b->nbs = nbs; 2305 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2306 if (!nnz) { 2307 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2308 else if (nz <= 0) nz = 1; 2309 for (i=0; i<mbs; i++) b->imax[i] = nz; 2310 nz = nz*mbs; 2311 } else { 2312 nz = 0; 2313 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2314 } 2315 2316 /* allocate the matrix space */ 2317 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2318 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2319 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2320 b->j = (PetscInt*)(b->a + nz*bs2); 2321 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2322 b->i = b->j + nz; 2323 b->singlemalloc = PETSC_TRUE; 2324 2325 b->i[0] = 0; 2326 for (i=1; i<mbs+1; i++) { 2327 b->i[i] = b->i[i-1] + b->imax[i-1]; 2328 } 2329 2330 /* b->ilen will count nonzeros in each block row so far. */ 2331 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2332 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2333 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2334 2335 B->bs = bs; 2336 b->bs2 = bs2; 2337 b->mbs = mbs; 2338 b->nz = 0; 2339 b->maxnz = nz*bs2; 2340 B->info.nz_unneeded = (PetscReal)b->maxnz; 2341 PetscFunctionReturn(0); 2342 } 2343 EXTERN_C_END 2344 2345 /*MC 2346 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2347 block sparse compressed row format. 2348 2349 Options Database Keys: 2350 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2351 2352 Level: beginner 2353 2354 .seealso: MatCreateSeqBAIJ 2355 M*/ 2356 2357 EXTERN_C_BEGIN 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatCreate_SeqBAIJ" 2360 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2361 { 2362 PetscErrorCode ierr; 2363 PetscMPIInt size; 2364 Mat_SeqBAIJ *b; 2365 2366 PetscFunctionBegin; 2367 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2368 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2369 2370 B->m = B->M = PetscMax(B->m,B->M); 2371 B->n = B->N = PetscMax(B->n,B->N); 2372 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2373 B->data = (void*)b; 2374 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2375 B->factor = 0; 2376 B->mapping = 0; 2377 b->row = 0; 2378 b->col = 0; 2379 b->icol = 0; 2380 b->reallocs = 0; 2381 b->saved_values = 0; 2382 #if defined(PETSC_USE_MAT_SINGLE) 2383 b->setvalueslen = 0; 2384 b->setvaluescopy = PETSC_NULL; 2385 #endif 2386 2387 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2388 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2389 2390 b->sorted = PETSC_FALSE; 2391 b->roworiented = PETSC_TRUE; 2392 b->nonew = 0; 2393 b->diag = 0; 2394 b->solve_work = 0; 2395 b->mult_work = 0; 2396 B->spptr = 0; 2397 B->info.nz_unneeded = (PetscReal)b->maxnz; 2398 b->keepzeroedrows = PETSC_FALSE; 2399 b->xtoy = 0; 2400 b->XtoY = 0; 2401 b->compressedrow.use = PETSC_FALSE; 2402 b->compressedrow.nrows = 0; 2403 b->compressedrow.i = PETSC_NULL; 2404 b->compressedrow.rindex = PETSC_NULL; 2405 b->compressedrow.checked = PETSC_FALSE; 2406 B->same_nonzero = PETSC_FALSE; 2407 2408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2409 "MatInvertBlockDiagonal_SeqBAIJ", 2410 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2412 "MatStoreValues_SeqBAIJ", 2413 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2415 "MatRetrieveValues_SeqBAIJ", 2416 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2417 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2418 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2419 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2420 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2421 "MatConvert_SeqBAIJ_SeqAIJ", 2422 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2423 #if !defined(PETSC_USE_64BIT_INT) 2424 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2425 "MatConvert_SeqBAIJ_SeqSBAIJ", 2426 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2427 #endif 2428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2429 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2430 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 EXTERN_C_END 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2437 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2438 { 2439 Mat C; 2440 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2441 PetscErrorCode ierr; 2442 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2443 2444 PetscFunctionBegin; 2445 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2446 2447 *B = 0; 2448 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2449 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2450 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2451 c = (Mat_SeqBAIJ*)C->data; 2452 2453 C->M = A->M; 2454 C->N = A->N; 2455 C->bs = A->bs; 2456 c->bs2 = a->bs2; 2457 c->mbs = a->mbs; 2458 c->nbs = a->nbs; 2459 2460 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2461 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2462 for (i=0; i<mbs; i++) { 2463 c->imax[i] = a->imax[i]; 2464 c->ilen[i] = a->ilen[i]; 2465 } 2466 2467 /* allocate the matrix space */ 2468 c->singlemalloc = PETSC_TRUE; 2469 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2470 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2471 c->j = (PetscInt*)(c->a + nz*bs2); 2472 c->i = c->j + nz; 2473 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2474 if (mbs > 0) { 2475 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2476 if (cpvalues == MAT_COPY_VALUES) { 2477 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2478 } else { 2479 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2480 } 2481 } 2482 2483 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2484 c->sorted = a->sorted; 2485 c->roworiented = a->roworiented; 2486 c->nonew = a->nonew; 2487 2488 if (a->diag) { 2489 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2490 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2491 for (i=0; i<mbs; i++) { 2492 c->diag[i] = a->diag[i]; 2493 } 2494 } else c->diag = 0; 2495 c->nz = a->nz; 2496 c->maxnz = a->maxnz; 2497 c->solve_work = 0; 2498 c->mult_work = 0; 2499 C->preallocated = PETSC_TRUE; 2500 C->assembled = PETSC_TRUE; 2501 2502 c->compressedrow.use = a->compressedrow.use; 2503 c->compressedrow.nrows = a->compressedrow.nrows; 2504 c->compressedrow.checked = a->compressedrow.checked; 2505 if ( a->compressedrow.checked && a->compressedrow.use){ 2506 i = a->compressedrow.nrows; 2507 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2508 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2509 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2510 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2511 } else { 2512 c->compressedrow.use = PETSC_FALSE; 2513 c->compressedrow.i = PETSC_NULL; 2514 c->compressedrow.rindex = PETSC_NULL; 2515 } 2516 C->same_nonzero = A->same_nonzero; 2517 *B = C; 2518 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatLoad_SeqBAIJ" 2524 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2525 { 2526 Mat_SeqBAIJ *a; 2527 Mat B; 2528 PetscErrorCode ierr; 2529 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2530 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2531 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2532 PetscInt *masked,nmask,tmp,bs2,ishift; 2533 PetscMPIInt size; 2534 int fd; 2535 PetscScalar *aa; 2536 MPI_Comm comm = ((PetscObject)viewer)->comm; 2537 2538 PetscFunctionBegin; 2539 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2540 bs2 = bs*bs; 2541 2542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2543 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2544 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2545 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2546 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2547 M = header[1]; N = header[2]; nz = header[3]; 2548 2549 if (header[3] < 0) { 2550 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2551 } 2552 2553 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2554 2555 /* 2556 This code adds extra rows to make sure the number of rows is 2557 divisible by the blocksize 2558 */ 2559 mbs = M/bs; 2560 extra_rows = bs - M + bs*(mbs); 2561 if (extra_rows == bs) extra_rows = 0; 2562 else mbs++; 2563 if (extra_rows) { 2564 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2565 } 2566 2567 /* read in row lengths */ 2568 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2569 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2570 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2571 2572 /* read in column indices */ 2573 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2574 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2575 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2576 2577 /* loop over row lengths determining block row lengths */ 2578 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2579 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2580 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2581 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2582 masked = mask + mbs; 2583 rowcount = 0; nzcount = 0; 2584 for (i=0; i<mbs; i++) { 2585 nmask = 0; 2586 for (j=0; j<bs; j++) { 2587 kmax = rowlengths[rowcount]; 2588 for (k=0; k<kmax; k++) { 2589 tmp = jj[nzcount++]/bs; 2590 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2591 } 2592 rowcount++; 2593 } 2594 browlengths[i] += nmask; 2595 /* zero out the mask elements we set */ 2596 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2597 } 2598 2599 /* create our matrix */ 2600 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2601 ierr = MatSetType(B,type);CHKERRQ(ierr); 2602 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2603 a = (Mat_SeqBAIJ*)B->data; 2604 2605 /* set matrix "i" values */ 2606 a->i[0] = 0; 2607 for (i=1; i<= mbs; i++) { 2608 a->i[i] = a->i[i-1] + browlengths[i-1]; 2609 a->ilen[i-1] = browlengths[i-1]; 2610 } 2611 a->nz = 0; 2612 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2613 2614 /* read in nonzero values */ 2615 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2616 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2617 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2618 2619 /* set "a" and "j" values into matrix */ 2620 nzcount = 0; jcount = 0; 2621 for (i=0; i<mbs; i++) { 2622 nzcountb = nzcount; 2623 nmask = 0; 2624 for (j=0; j<bs; j++) { 2625 kmax = rowlengths[i*bs+j]; 2626 for (k=0; k<kmax; k++) { 2627 tmp = jj[nzcount++]/bs; 2628 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2629 } 2630 } 2631 /* sort the masked values */ 2632 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2633 2634 /* set "j" values into matrix */ 2635 maskcount = 1; 2636 for (j=0; j<nmask; j++) { 2637 a->j[jcount++] = masked[j]; 2638 mask[masked[j]] = maskcount++; 2639 } 2640 /* set "a" values into matrix */ 2641 ishift = bs2*a->i[i]; 2642 for (j=0; j<bs; j++) { 2643 kmax = rowlengths[i*bs+j]; 2644 for (k=0; k<kmax; k++) { 2645 tmp = jj[nzcountb]/bs ; 2646 block = mask[tmp] - 1; 2647 point = jj[nzcountb] - bs*tmp; 2648 idx = ishift + bs2*block + j + bs*point; 2649 a->a[idx] = (MatScalar)aa[nzcountb++]; 2650 } 2651 } 2652 /* zero out the mask elements we set */ 2653 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2654 } 2655 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2656 2657 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2658 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2659 ierr = PetscFree(aa);CHKERRQ(ierr); 2660 ierr = PetscFree(jj);CHKERRQ(ierr); 2661 ierr = PetscFree(mask);CHKERRQ(ierr); 2662 2663 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2664 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2665 ierr = MatView_Private(B);CHKERRQ(ierr); 2666 2667 *A = B; 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #undef __FUNCT__ 2672 #define __FUNCT__ "MatCreateSeqBAIJ" 2673 /*@C 2674 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2675 compressed row) format. For good matrix assembly performance the 2676 user should preallocate the matrix storage by setting the parameter nz 2677 (or the array nnz). By setting these parameters accurately, performance 2678 during matrix assembly can be increased by more than a factor of 50. 2679 2680 Collective on MPI_Comm 2681 2682 Input Parameters: 2683 + comm - MPI communicator, set to PETSC_COMM_SELF 2684 . bs - size of block 2685 . m - number of rows 2686 . n - number of columns 2687 . nz - number of nonzero blocks per block row (same for all rows) 2688 - nnz - array containing the number of nonzero blocks in the various block rows 2689 (possibly different for each block row) or PETSC_NULL 2690 2691 Output Parameter: 2692 . A - the matrix 2693 2694 Options Database Keys: 2695 . -mat_no_unroll - uses code that does not unroll the loops in the 2696 block calculations (much slower) 2697 . -mat_block_size - size of the blocks to use 2698 2699 Level: intermediate 2700 2701 Notes: 2702 The number of rows and columns must be divisible by blocksize. 2703 2704 If the nnz parameter is given then the nz parameter is ignored 2705 2706 A nonzero block is any block that as 1 or more nonzeros in it 2707 2708 The block AIJ format is fully compatible with standard Fortran 77 2709 storage. That is, the stored row and column indices can begin at 2710 either one (as in Fortran) or zero. See the users' manual for details. 2711 2712 Specify the preallocated storage with either nz or nnz (not both). 2713 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2714 allocation. For additional details, see the users manual chapter on 2715 matrices. 2716 2717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2718 @*/ 2719 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2720 { 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2725 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2726 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2732 /*@C 2733 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2734 per row in the matrix. For good matrix assembly performance the 2735 user should preallocate the matrix storage by setting the parameter nz 2736 (or the array nnz). By setting these parameters accurately, performance 2737 during matrix assembly can be increased by more than a factor of 50. 2738 2739 Collective on MPI_Comm 2740 2741 Input Parameters: 2742 + A - the matrix 2743 . bs - size of block 2744 . nz - number of block nonzeros per block row (same for all rows) 2745 - nnz - array containing the number of block nonzeros in the various block rows 2746 (possibly different for each block row) or PETSC_NULL 2747 2748 Options Database Keys: 2749 . -mat_no_unroll - uses code that does not unroll the loops in the 2750 block calculations (much slower) 2751 . -mat_block_size - size of the blocks to use 2752 2753 Level: intermediate 2754 2755 Notes: 2756 If the nnz parameter is given then the nz parameter is ignored 2757 2758 The block AIJ format is fully compatible with standard Fortran 77 2759 storage. That is, the stored row and column indices can begin at 2760 either one (as in Fortran) or zero. See the users' manual for details. 2761 2762 Specify the preallocated storage with either nz or nnz (not both). 2763 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2764 allocation. For additional details, see the users manual chapter on 2765 matrices. 2766 2767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2768 @*/ 2769 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2770 { 2771 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2772 2773 PetscFunctionBegin; 2774 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2775 if (f) { 2776 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2777 } 2778 PetscFunctionReturn(0); 2779 } 2780 2781