1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for Fun3d sequential benchmark 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 stepval = (n-1)*4; 590 for (k=0; k<m; k++) { /* loop over added rows */ 591 row = im[k]; 592 rp = aj + ai[row]; 593 ap = aa + 16*ai[row]; 594 nrow = ailen[row]; 595 low = 0; 596 for (l=0; l<n; l++) { /* loop over added columns */ 597 col = in[l]; 598 value = v + k*(stepval+4)*4 + l*4; 599 low = 0; high = nrow; 600 while (high-low > 7) { 601 t = (low+high)/2; 602 if (rp[t] > col) high = t; 603 else low = t; 604 } 605 for (i=low; i<high; i++) { 606 if (rp[i] > col) break; 607 if (rp[i] == col) { 608 bap = ap + 16*i; 609 for (ii=0; ii<4; ii++,value+=stepval) { 610 for (jj=ii; jj<16; jj+=4) { 611 bap[jj] += *value++; 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 N = nrow++ - 1; 618 /* shift up all the later entries in this row */ 619 for (ii=N; ii>=i; ii--) { 620 rp[ii+1] = rp[ii]; 621 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 622 } 623 if (N >= i) { 624 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 625 } 626 rp[i] = col; 627 bap = ap + 16*i; 628 for (ii=0; ii<4; ii++,value+=stepval) { 629 for (jj=ii; jj<16; jj+=4) { 630 bap[jj] = *value++; 631 } 632 } 633 noinsert2:; 634 low = i; 635 } 636 ailen[row] = nrow; 637 } 638 PetscFunctionReturnVoid(); 639 } 640 EXTERN_C_END 641 642 #if defined(PETSC_HAVE_FORTRAN_CAPS) 643 #define matsetvalues4_ MATSETVALUES4 644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 645 #define matsetvalues4_ matsetvalues4 646 #endif 647 648 EXTERN_C_BEGIN 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSetValues4_" 651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 652 { 653 Mat A = *AA; 654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 655 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 656 PetscInt *ai=a->i,*ailen=a->ilen; 657 PetscInt *aj=a->j,brow,bcol; 658 PetscInt ridx,cidx; 659 MatScalar *ap,value,*aa=a->a,*bap; 660 661 PetscFunctionBegin; 662 for (k=0; k<m; k++) { /* loop over added rows */ 663 row = im[k]; brow = row/4; 664 rp = aj + ai[brow]; 665 ap = aa + 16*ai[brow]; 666 nrow = ailen[brow]; 667 low = 0; 668 for (l=0; l<n; l++) { /* loop over added columns */ 669 col = in[l]; bcol = col/4; 670 ridx = row % 4; cidx = col % 4; 671 value = v[l + k*n]; 672 low = 0; high = nrow; 673 while (high-low > 7) { 674 t = (low+high)/2; 675 if (rp[t] > bcol) high = t; 676 else low = t; 677 } 678 for (i=low; i<high; i++) { 679 if (rp[i] > bcol) break; 680 if (rp[i] == bcol) { 681 bap = ap + 16*i + 4*cidx + ridx; 682 *bap += value; 683 goto noinsert1; 684 } 685 } 686 N = nrow++ - 1; 687 /* shift up all the later entries in this row */ 688 for (ii=N; ii>=i; ii--) { 689 rp[ii+1] = rp[ii]; 690 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 691 } 692 if (N>=i) { 693 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 694 } 695 rp[i] = bcol; 696 ap[16*i + 4*cidx + ridx] = value; 697 noinsert1:; 698 low = i; 699 } 700 ailen[brow] = nrow; 701 } 702 PetscFunctionReturnVoid(); 703 } 704 EXTERN_C_END 705 706 /* UGLY, ugly, ugly 707 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 708 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 709 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 710 converts the entries into single precision and then calls ..._MatScalar() to put them 711 into the single precision data structures. 712 */ 713 #if defined(PETSC_USE_MAT_SINGLE) 714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 715 #else 716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 717 #endif 718 719 #define CHUNKSIZE 10 720 721 /* 722 Checks for missing diagonals 723 */ 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 727 { 728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 729 PetscErrorCode ierr; 730 PetscInt *diag,*jj = a->j,i; 731 732 PetscFunctionBegin; 733 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 734 diag = a->diag; 735 for (i=0; i<a->mbs; i++) { 736 if (jj[diag[i]] != i) { 737 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 746 { 747 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscInt i,j,*diag,m = a->mbs; 750 751 PetscFunctionBegin; 752 if (a->diag) PetscFunctionReturn(0); 753 754 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 755 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 756 for (i=0; i<m; i++) { 757 diag[i] = a->i[i+1]; 758 for (j=a->i[i]; j<a->i[i+1]; j++) { 759 if (a->j[j] == i) { 760 diag[i] = j; 761 break; 762 } 763 } 764 } 765 a->diag = diag; 766 PetscFunctionReturn(0); 767 } 768 769 770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 775 { 776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 777 PetscErrorCode ierr; 778 PetscInt n = a->mbs,i; 779 780 PetscFunctionBegin; 781 *nn = n; 782 if (!ia) PetscFunctionReturn(0); 783 if (symmetric) { 784 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 785 } else if (oshift == 1) { 786 /* temporarily add 1 to i and j indices */ 787 PetscInt nz = a->i[n]; 788 for (i=0; i<nz; i++) a->j[i]++; 789 for (i=0; i<n+1; i++) a->i[i]++; 790 *ia = a->i; *ja = a->j; 791 } else { 792 *ia = a->i; *ja = a->j; 793 } 794 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 801 { 802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt i,n = a->mbs; 805 806 PetscFunctionBegin; 807 if (!ia) PetscFunctionReturn(0); 808 if (symmetric) { 809 ierr = PetscFree(*ia);CHKERRQ(ierr); 810 ierr = PetscFree(*ja);CHKERRQ(ierr); 811 } else if (oshift == 1) { 812 PetscInt nz = a->i[n]-1; 813 for (i=0; i<nz; i++) a->j[i]--; 814 for (i=0; i<n+1; i++) a->i[i]--; 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatDestroy_SeqBAIJ" 821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 822 { 823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 #if defined(PETSC_USE_LOG) 828 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 829 #endif 830 if (!a->singlemalloc) { 831 ierr = PetscFree(a->a);CHKERRQ(ierr); 832 ierr = PetscFree(a->i);CHKERRQ(ierr); 833 ierr = PetscFree(a->j);CHKERRQ(ierr); 834 } else { 835 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 836 } 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 844 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 845 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 846 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 847 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 848 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 849 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 850 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 851 #if defined(PETSC_USE_MAT_SINGLE) 852 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 853 #endif 854 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 855 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 856 857 ierr = PetscFree(a);CHKERRQ(ierr); 858 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSetOption_SeqBAIJ" 871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 872 { 873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = PETSC_TRUE; 880 break; 881 case MAT_COLUMN_ORIENTED: 882 a->roworiented = PETSC_FALSE; 883 break; 884 case MAT_COLUMNS_SORTED: 885 a->sorted = PETSC_TRUE; 886 break; 887 case MAT_COLUMNS_UNSORTED: 888 a->sorted = PETSC_FALSE; 889 break; 890 case MAT_KEEP_ZEROED_ROWS: 891 a->keepzeroedrows = PETSC_TRUE; 892 break; 893 case MAT_NO_NEW_NONZERO_LOCATIONS: 894 a->nonew = 1; 895 break; 896 case MAT_NEW_NONZERO_LOCATION_ERR: 897 a->nonew = -1; 898 break; 899 case MAT_NEW_NONZERO_ALLOCATION_ERR: 900 a->nonew = -2; 901 break; 902 case MAT_YES_NEW_NONZERO_LOCATIONS: 903 a->nonew = 0; 904 break; 905 case MAT_ROWS_SORTED: 906 case MAT_ROWS_UNSORTED: 907 case MAT_YES_NEW_DIAGONALS: 908 case MAT_IGNORE_OFF_PROC_ENTRIES: 909 case MAT_USE_HASH_TABLE: 910 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 911 break; 912 case MAT_NO_NEW_DIAGONALS: 913 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 914 case MAT_SYMMETRIC: 915 case MAT_STRUCTURALLY_SYMMETRIC: 916 case MAT_NOT_SYMMETRIC: 917 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 918 case MAT_HERMITIAN: 919 case MAT_NOT_HERMITIAN: 920 case MAT_SYMMETRY_ETERNAL: 921 case MAT_NOT_SYMMETRY_ETERNAL: 922 break; 923 default: 924 SETERRQ(PETSC_ERR_SUP,"unknown option"); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatGetRow_SeqBAIJ" 931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscErrorCode ierr; 935 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 936 MatScalar *aa,*aa_i; 937 PetscScalar *v_i; 938 939 PetscFunctionBegin; 940 bs = A->bs; 941 ai = a->i; 942 aj = a->j; 943 aa = a->a; 944 bs2 = a->bs2; 945 946 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 947 948 bn = row/bs; /* Block number */ 949 bp = row % bs; /* Block Position */ 950 M = ai[bn+1] - ai[bn]; 951 *nz = bs*M; 952 953 if (v) { 954 *v = 0; 955 if (*nz) { 956 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 957 for (i=0; i<M; i++) { /* for each block in the block row */ 958 v_i = *v + i*bs; 959 aa_i = aa + bs2*(ai[bn] + i); 960 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 961 } 962 } 963 } 964 965 if (idx) { 966 *idx = 0; 967 if (*nz) { 968 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 969 for (i=0; i<M; i++) { /* for each block in the block row */ 970 idx_i = *idx + i*bs; 971 itmp = bs*aj[ai[bn] + i]; 972 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 973 } 974 } 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 982 { 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 987 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatTranspose_SeqBAIJ" 993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 994 { 995 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 996 Mat C; 997 PetscErrorCode ierr; 998 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 999 PetscInt *rows,*cols,bs2=a->bs2; 1000 PetscScalar *array; 1001 1002 PetscFunctionBegin; 1003 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1004 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1005 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1006 1007 #if defined(PETSC_USE_MAT_SINGLE) 1008 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1009 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1010 #else 1011 array = a->a; 1012 #endif 1013 1014 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1015 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 1016 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1017 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1018 ierr = PetscFree(col);CHKERRQ(ierr); 1019 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1020 cols = rows + bs; 1021 for (i=0; i<mbs; i++) { 1022 cols[0] = i*bs; 1023 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1024 len = ai[i+1] - ai[i]; 1025 for (j=0; j<len; j++) { 1026 rows[0] = (*aj++)*bs; 1027 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1028 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1029 array += bs2; 1030 } 1031 } 1032 ierr = PetscFree(rows);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_MAT_SINGLE) 1034 ierr = PetscFree(array); 1035 #endif 1036 1037 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 1040 if (B) { 1041 *B = C; 1042 } else { 1043 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1055 int fd; 1056 PetscScalar *aa; 1057 FILE *file; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1061 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1062 col_lens[0] = MAT_FILE_COOKIE; 1063 1064 col_lens[1] = A->m; 1065 col_lens[2] = A->n; 1066 col_lens[3] = a->nz*bs2; 1067 1068 /* store lengths of each row and write (including header) to file */ 1069 count = 0; 1070 for (i=0; i<a->mbs; i++) { 1071 for (j=0; j<bs; j++) { 1072 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1073 } 1074 } 1075 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1076 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1077 1078 /* store column indices (zero start index) */ 1079 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1080 count = 0; 1081 for (i=0; i<a->mbs; i++) { 1082 for (j=0; j<bs; j++) { 1083 for (k=a->i[i]; k<a->i[i+1]; k++) { 1084 for (l=0; l<bs; l++) { 1085 jj[count++] = bs*a->j[k] + l; 1086 } 1087 } 1088 } 1089 } 1090 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1091 ierr = PetscFree(jj);CHKERRQ(ierr); 1092 1093 /* store nonzero values */ 1094 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1095 count = 0; 1096 for (i=0; i<a->mbs; i++) { 1097 for (j=0; j<bs; j++) { 1098 for (k=a->i[i]; k<a->i[i+1]; k++) { 1099 for (l=0; l<bs; l++) { 1100 aa[count++] = a->a[bs2*k + l*bs + j]; 1101 } 1102 } 1103 } 1104 } 1105 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1106 ierr = PetscFree(aa);CHKERRQ(ierr); 1107 1108 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1109 if (file) { 1110 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1118 { 1119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1122 PetscViewerFormat format; 1123 1124 PetscFunctionBegin; 1125 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1126 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1127 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1128 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1129 Mat aij; 1130 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1131 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1132 ierr = MatDestroy(aij);CHKERRQ(ierr); 1133 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1134 PetscFunctionReturn(0); 1135 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1136 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1137 for (i=0; i<a->mbs; i++) { 1138 for (j=0; j<bs; j++) { 1139 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1140 for (k=a->i[i]; k<a->i[i+1]; k++) { 1141 for (l=0; l<bs; l++) { 1142 #if defined(PETSC_USE_COMPLEX) 1143 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1144 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1145 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1146 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } 1152 #else 1153 if (a->a[bs2*k + l*bs + j] != 0.0) { 1154 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1155 } 1156 #endif 1157 } 1158 } 1159 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1160 } 1161 } 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1163 } else { 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1165 for (i=0; i<a->mbs; i++) { 1166 for (j=0; j<bs; j++) { 1167 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1168 for (k=a->i[i]; k<a->i[i+1]; k++) { 1169 for (l=0; l<bs; l++) { 1170 #if defined(PETSC_USE_COMPLEX) 1171 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1173 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1174 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } 1180 #else 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1182 #endif 1183 } 1184 } 1185 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1186 } 1187 } 1188 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1189 } 1190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1197 { 1198 Mat A = (Mat) Aa; 1199 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1202 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1203 MatScalar *aa; 1204 PetscViewer viewer; 1205 1206 PetscFunctionBegin; 1207 1208 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1209 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1210 1211 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1212 1213 /* loop over matrix elements drawing boxes */ 1214 color = PETSC_DRAW_BLUE; 1215 for (i=0,row=0; i<mbs; i++,row+=bs) { 1216 for (j=a->i[i]; j<a->i[i+1]; j++) { 1217 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1218 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1219 aa = a->a + j*bs2; 1220 for (k=0; k<bs; k++) { 1221 for (l=0; l<bs; l++) { 1222 if (PetscRealPart(*aa++) >= 0.) continue; 1223 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 color = PETSC_DRAW_CYAN; 1229 for (i=0,row=0; i<mbs; i++,row+=bs) { 1230 for (j=a->i[i]; j<a->i[i+1]; j++) { 1231 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1232 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1233 aa = a->a + j*bs2; 1234 for (k=0; k<bs; k++) { 1235 for (l=0; l<bs; l++) { 1236 if (PetscRealPart(*aa++) != 0.) continue; 1237 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1238 } 1239 } 1240 } 1241 } 1242 1243 color = PETSC_DRAW_RED; 1244 for (i=0,row=0; i<mbs; i++,row+=bs) { 1245 for (j=a->i[i]; j<a->i[i+1]; j++) { 1246 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1248 aa = a->a + j*bs2; 1249 for (k=0; k<bs; k++) { 1250 for (l=0; l<bs; l++) { 1251 if (PetscRealPart(*aa++) <= 0.) continue; 1252 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1253 } 1254 } 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1263 { 1264 PetscErrorCode ierr; 1265 PetscReal xl,yl,xr,yr,w,h; 1266 PetscDraw draw; 1267 PetscTruth isnull; 1268 1269 PetscFunctionBegin; 1270 1271 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1272 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1273 1274 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1275 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1276 xr += w; yr += h; xl = -w; yl = -h; 1277 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1278 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1279 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_SeqBAIJ" 1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1286 { 1287 PetscErrorCode ierr; 1288 PetscTruth iascii,isbinary,isdraw; 1289 1290 PetscFunctionBegin; 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1294 if (iascii){ 1295 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1296 } else if (isbinary) { 1297 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1298 } else if (isdraw) { 1299 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1300 } else { 1301 Mat B; 1302 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1303 ierr = MatView(B,viewer);CHKERRQ(ierr); 1304 ierr = MatDestroy(B);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1316 PetscInt *ai = a->i,*ailen = a->ilen; 1317 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1318 MatScalar *ap,*aa = a->a,zero = 0.0; 1319 1320 PetscFunctionBegin; 1321 for (k=0; k<m; k++) { /* loop over rows */ 1322 row = im[k]; brow = row/bs; 1323 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1324 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1325 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1326 nrow = ailen[brow]; 1327 for (l=0; l<n; l++) { /* loop over columns */ 1328 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1329 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1330 col = in[l] ; 1331 bcol = col/bs; 1332 cidx = col%bs; 1333 ridx = row%bs; 1334 high = nrow; 1335 low = 0; /* assume unsorted */ 1336 while (high-low > 5) { 1337 t = (low+high)/2; 1338 if (rp[t] > bcol) high = t; 1339 else low = t; 1340 } 1341 for (i=low; i<high; i++) { 1342 if (rp[i] > bcol) break; 1343 if (rp[i] == bcol) { 1344 *v++ = ap[bs2*i+bs*cidx+ridx]; 1345 goto finished; 1346 } 1347 } 1348 *v++ = zero; 1349 finished:; 1350 } 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #if defined(PETSC_USE_MAT_SINGLE) 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1359 { 1360 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 PetscInt i,N = m*n*b->bs2; 1363 MatScalar *vsingle; 1364 1365 PetscFunctionBegin; 1366 if (N > b->setvalueslen) { 1367 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1368 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1369 b->setvalueslen = N; 1370 } 1371 vsingle = b->setvaluescopy; 1372 for (i=0; i<N; i++) { 1373 vsingle[i] = v[i]; 1374 } 1375 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 #endif 1379 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1384 { 1385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1386 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1387 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1388 PetscErrorCode ierr; 1389 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1390 PetscTruth roworiented=a->roworiented; 1391 const MatScalar *value = v; 1392 MatScalar *ap,*aa = a->a,*bap; 1393 1394 PetscFunctionBegin; 1395 if (roworiented) { 1396 stepval = (n-1)*bs; 1397 } else { 1398 stepval = (m-1)*bs; 1399 } 1400 for (k=0; k<m; k++) { /* loop over added rows */ 1401 row = im[k]; 1402 if (row < 0) continue; 1403 #if defined(PETSC_USE_DEBUG) 1404 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1405 #endif 1406 rp = aj + ai[row]; 1407 ap = aa + bs2*ai[row]; 1408 rmax = imax[row]; 1409 nrow = ailen[row]; 1410 low = 0; 1411 high = nrow; 1412 for (l=0; l<n; l++) { /* loop over added columns */ 1413 if (in[l] < 0) continue; 1414 #if defined(PETSC_USE_DEBUG) 1415 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1416 #endif 1417 col = in[l]; 1418 if (roworiented) { 1419 value = v + k*(stepval+bs)*bs + l*bs; 1420 } else { 1421 value = v + l*(stepval+bs)*bs + k*bs; 1422 } 1423 if (col < lastcol) low = 0; else high = nrow; 1424 lastcol = col; 1425 while (high-low > 7) { 1426 t = (low+high)/2; 1427 if (rp[t] > col) high = t; 1428 else low = t; 1429 } 1430 for (i=low; i<high; i++) { 1431 if (rp[i] > col) break; 1432 if (rp[i] == col) { 1433 bap = ap + bs2*i; 1434 if (roworiented) { 1435 if (is == ADD_VALUES) { 1436 for (ii=0; ii<bs; ii++,value+=stepval) { 1437 for (jj=ii; jj<bs2; jj+=bs) { 1438 bap[jj] += *value++; 1439 } 1440 } 1441 } else { 1442 for (ii=0; ii<bs; ii++,value+=stepval) { 1443 for (jj=ii; jj<bs2; jj+=bs) { 1444 bap[jj] = *value++; 1445 } 1446 } 1447 } 1448 } else { 1449 if (is == ADD_VALUES) { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=0; jj<bs; jj++) { 1452 *bap++ += *value++; 1453 } 1454 } 1455 } else { 1456 for (ii=0; ii<bs; ii++,value+=stepval) { 1457 for (jj=0; jj<bs; jj++) { 1458 *bap++ = *value++; 1459 } 1460 } 1461 } 1462 } 1463 goto noinsert2; 1464 } 1465 } 1466 if (nonew == 1) goto noinsert2; 1467 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1468 if (nrow >= rmax) { 1469 /* there is no extra room in row, therefore enlarge */ 1470 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1471 MatScalar *new_a; 1472 1473 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1474 1475 /* malloc new storage space */ 1476 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 1477 1478 /* copy over old data into new slots */ 1479 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1480 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1481 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1482 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1483 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1484 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1485 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1486 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1487 /* free up old matrix storage */ 1488 if (!a->singlemalloc) { 1489 ierr = PetscFree(a->a);CHKERRQ(ierr); 1490 ierr = PetscFree(a->i);CHKERRQ(ierr); 1491 ierr = PetscFree(a->j);CHKERRQ(ierr); 1492 } else { 1493 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 1494 } 1495 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1496 a->singlemalloc = PETSC_TRUE; 1497 1498 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1499 rmax = imax[row] = imax[row] + CHUNKSIZE; 1500 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1501 a->maxnz += bs2*CHUNKSIZE; 1502 a->reallocs++; 1503 a->nz++; 1504 } 1505 N = nrow++ - 1; 1506 /* shift up all the later entries in this row */ 1507 for (ii=N; ii>=i; ii--) { 1508 rp[ii+1] = rp[ii]; 1509 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1510 } 1511 if (N >= i) { 1512 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1513 } 1514 rp[i] = col; 1515 bap = ap + bs2*i; 1516 if (roworiented) { 1517 for (ii=0; ii<bs; ii++,value+=stepval) { 1518 for (jj=ii; jj<bs2; jj+=bs) { 1519 bap[jj] = *value++; 1520 } 1521 } 1522 } else { 1523 for (ii=0; ii<bs; ii++,value+=stepval) { 1524 for (jj=0; jj<bs; jj++) { 1525 *bap++ = *value++; 1526 } 1527 } 1528 } 1529 noinsert2:; 1530 low = i; 1531 } 1532 ailen[row] = nrow; 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1539 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1540 { 1541 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1542 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1543 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1544 PetscErrorCode ierr; 1545 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1546 MatScalar *aa = a->a,*ap; 1547 PetscReal ratio=0.6; 1548 1549 PetscFunctionBegin; 1550 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1551 1552 if (m) rmax = ailen[0]; 1553 for (i=1; i<mbs; i++) { 1554 /* move each row back by the amount of empty slots (fshift) before it*/ 1555 fshift += imax[i-1] - ailen[i-1]; 1556 rmax = PetscMax(rmax,ailen[i]); 1557 if (fshift) { 1558 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1559 N = ailen[i]; 1560 for (j=0; j<N; j++) { 1561 ip[j-fshift] = ip[j]; 1562 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1563 } 1564 } 1565 ai[i] = ai[i-1] + ailen[i-1]; 1566 } 1567 if (mbs) { 1568 fshift += imax[mbs-1] - ailen[mbs-1]; 1569 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1570 } 1571 /* reset ilen and imax for each row */ 1572 for (i=0; i<mbs; i++) { 1573 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1574 } 1575 a->nz = ai[mbs]; 1576 1577 /* diagonals may have moved, so kill the diagonal pointers */ 1578 a->idiagvalid = PETSC_FALSE; 1579 if (fshift && a->diag) { 1580 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1581 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1582 a->diag = 0; 1583 } 1584 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1585 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1586 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1587 a->reallocs = 0; 1588 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1589 1590 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1591 if (a->compressedrow.use){ 1592 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1593 } 1594 1595 A->same_nonzero = PETSC_TRUE; 1596 PetscFunctionReturn(0); 1597 } 1598 1599 /* 1600 This function returns an array of flags which indicate the locations of contiguous 1601 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1602 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1603 Assume: sizes should be long enough to hold all the values. 1604 */ 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1607 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1608 { 1609 PetscInt i,j,k,row; 1610 PetscTruth flg; 1611 1612 PetscFunctionBegin; 1613 for (i=0,j=0; i<n; j++) { 1614 row = idx[i]; 1615 if (row%bs!=0) { /* Not the begining of a block */ 1616 sizes[j] = 1; 1617 i++; 1618 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1619 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1620 i++; 1621 } else { /* Begining of the block, so check if the complete block exists */ 1622 flg = PETSC_TRUE; 1623 for (k=1; k<bs; k++) { 1624 if (row+k != idx[i+k]) { /* break in the block */ 1625 flg = PETSC_FALSE; 1626 break; 1627 } 1628 } 1629 if (flg) { /* No break in the bs */ 1630 sizes[j] = bs; 1631 i+= bs; 1632 } else { 1633 sizes[j] = 1; 1634 i++; 1635 } 1636 } 1637 } 1638 *bs_max = j; 1639 PetscFunctionReturn(0); 1640 } 1641 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1644 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1645 { 1646 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1647 PetscErrorCode ierr; 1648 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1649 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1650 PetscScalar zero = 0.0; 1651 MatScalar *aa; 1652 1653 PetscFunctionBegin; 1654 /* Make a copy of the IS and sort it */ 1655 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1656 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1657 1658 /* allocate memory for rows,sizes */ 1659 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1660 sizes = rows + is_n; 1661 1662 /* copy IS values to rows, and sort them */ 1663 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1664 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1665 if (baij->keepzeroedrows) { 1666 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1667 bs_max = is_n; 1668 A->same_nonzero = PETSC_TRUE; 1669 } else { 1670 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1671 A->same_nonzero = PETSC_FALSE; 1672 } 1673 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1674 1675 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1676 row = rows[j]; 1677 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1678 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1679 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1680 if (sizes[i] == bs && !baij->keepzeroedrows) { 1681 if (diag) { 1682 if (baij->ilen[row/bs] > 0) { 1683 baij->ilen[row/bs] = 1; 1684 baij->j[baij->i[row/bs]] = row/bs; 1685 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1686 } 1687 /* Now insert all the diagonal values for this bs */ 1688 for (k=0; k<bs; k++) { 1689 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1690 } 1691 } else { /* (!diag) */ 1692 baij->ilen[row/bs] = 0; 1693 } /* end (!diag) */ 1694 } else { /* (sizes[i] != bs) */ 1695 #if defined (PETSC_USE_DEBUG) 1696 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1697 #endif 1698 for (k=0; k<count; k++) { 1699 aa[0] = zero; 1700 aa += bs; 1701 } 1702 if (diag) { 1703 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1704 } 1705 } 1706 } 1707 1708 ierr = PetscFree(rows);CHKERRQ(ierr); 1709 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1715 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1716 { 1717 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1718 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1719 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1720 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1721 PetscErrorCode ierr; 1722 PetscInt ridx,cidx,bs2=a->bs2; 1723 PetscTruth roworiented=a->roworiented; 1724 MatScalar *ap,value,*aa=a->a,*bap; 1725 1726 PetscFunctionBegin; 1727 for (k=0; k<m; k++) { /* loop over added rows */ 1728 row = im[k]; brow = row/bs; 1729 if (row < 0) continue; 1730 #if defined(PETSC_USE_DEBUG) 1731 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1732 #endif 1733 rp = aj + ai[brow]; 1734 ap = aa + bs2*ai[brow]; 1735 rmax = imax[brow]; 1736 nrow = ailen[brow]; 1737 low = 0; 1738 high = nrow; 1739 for (l=0; l<n; l++) { /* loop over added columns */ 1740 if (in[l] < 0) continue; 1741 #if defined(PETSC_USE_DEBUG) 1742 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1743 #endif 1744 col = in[l]; bcol = col/bs; 1745 ridx = row % bs; cidx = col % bs; 1746 if (roworiented) { 1747 value = v[l + k*n]; 1748 } else { 1749 value = v[k + l*m]; 1750 } 1751 if (col < lastcol) low = 0; else high = nrow; 1752 lastcol = col; 1753 while (high-low > 7) { 1754 t = (low+high)/2; 1755 if (rp[t] > bcol) high = t; 1756 else low = t; 1757 } 1758 for (i=low; i<high; i++) { 1759 if (rp[i] > bcol) break; 1760 if (rp[i] == bcol) { 1761 bap = ap + bs2*i + bs*cidx + ridx; 1762 if (is == ADD_VALUES) *bap += value; 1763 else *bap = value; 1764 goto noinsert1; 1765 } 1766 } 1767 if (nonew == 1) goto noinsert1; 1768 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1769 if (nrow >= rmax) { 1770 /* there is no extra room in row, therefore enlarge */ 1771 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1772 MatScalar *new_a; 1773 1774 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1775 1776 /* Malloc new storage space */ 1777 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 1778 1779 /* copy over old data into new slots */ 1780 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1781 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1782 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1783 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1784 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1785 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1786 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1787 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1788 /* free up old matrix storage */ 1789 /* free up old matrix storage */ 1790 if (!a->singlemalloc) { 1791 ierr = PetscFree(a->a);CHKERRQ(ierr); 1792 ierr = PetscFree(a->i);CHKERRQ(ierr); 1793 ierr = PetscFree(a->j);CHKERRQ(ierr); 1794 } else { 1795 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 1796 } 1797 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1798 a->singlemalloc = PETSC_TRUE; 1799 1800 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1801 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1802 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1803 a->maxnz += bs2*CHUNKSIZE; 1804 a->reallocs++; 1805 a->nz++; 1806 } 1807 N = nrow++ - 1; 1808 /* shift up all the later entries in this row */ 1809 for (ii=N; ii>=i; ii--) { 1810 rp[ii+1] = rp[ii]; 1811 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1812 } 1813 if (N>=i) { 1814 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1815 } 1816 rp[i] = bcol; 1817 ap[bs2*i + bs*cidx + ridx] = value; 1818 noinsert1:; 1819 low = i; 1820 } 1821 ailen[brow] = nrow; 1822 } 1823 A->same_nonzero = PETSC_FALSE; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1830 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1831 { 1832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1833 Mat outA; 1834 PetscErrorCode ierr; 1835 PetscTruth row_identity,col_identity; 1836 1837 PetscFunctionBegin; 1838 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1839 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1840 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1841 if (!row_identity || !col_identity) { 1842 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1843 } 1844 1845 outA = inA; 1846 inA->factor = FACTOR_LU; 1847 1848 if (!a->diag) { 1849 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1850 } 1851 1852 a->row = row; 1853 a->col = col; 1854 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1855 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1856 1857 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1858 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1859 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1860 1861 /* 1862 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1863 for ILU(0) factorization with natural ordering 1864 */ 1865 if (inA->bs < 8) { 1866 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1867 } else { 1868 if (!a->solve_work) { 1869 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1870 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1871 } 1872 } 1873 1874 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1875 1876 PetscFunctionReturn(0); 1877 } 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1880 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1881 { 1882 static PetscTruth called = PETSC_FALSE; 1883 MPI_Comm comm = A->comm; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1888 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1889 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 EXTERN_C_BEGIN 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1896 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1897 { 1898 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1899 PetscInt i,nz,nbs; 1900 1901 PetscFunctionBegin; 1902 nz = baij->maxnz/baij->bs2; 1903 nbs = baij->nbs; 1904 for (i=0; i<nz; i++) { 1905 baij->j[i] = indices[i]; 1906 } 1907 baij->nz = nz; 1908 for (i=0; i<nbs; i++) { 1909 baij->ilen[i] = baij->imax[i]; 1910 } 1911 1912 PetscFunctionReturn(0); 1913 } 1914 EXTERN_C_END 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1918 /*@ 1919 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1920 in the matrix. 1921 1922 Input Parameters: 1923 + mat - the SeqBAIJ matrix 1924 - indices - the column indices 1925 1926 Level: advanced 1927 1928 Notes: 1929 This can be called if you have precomputed the nonzero structure of the 1930 matrix and want to provide it to the matrix object to improve the performance 1931 of the MatSetValues() operation. 1932 1933 You MUST have set the correct numbers of nonzeros per row in the call to 1934 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1935 1936 MUST be called before any calls to MatSetValues(); 1937 1938 @*/ 1939 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1940 { 1941 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1942 1943 PetscFunctionBegin; 1944 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1945 PetscValidPointer(indices,2); 1946 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1947 if (f) { 1948 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1949 } else { 1950 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1957 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1958 { 1959 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1960 PetscErrorCode ierr; 1961 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1962 PetscReal atmp; 1963 PetscScalar *x,zero = 0.0; 1964 MatScalar *aa; 1965 PetscInt ncols,brow,krow,kcol; 1966 1967 PetscFunctionBegin; 1968 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1969 bs = A->bs; 1970 aa = a->a; 1971 ai = a->i; 1972 aj = a->j; 1973 mbs = a->mbs; 1974 1975 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1976 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1977 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1978 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1979 for (i=0; i<mbs; i++) { 1980 ncols = ai[1] - ai[0]; ai++; 1981 brow = bs*i; 1982 for (j=0; j<ncols; j++){ 1983 /* bcol = bs*(*aj); */ 1984 for (kcol=0; kcol<bs; kcol++){ 1985 for (krow=0; krow<bs; krow++){ 1986 atmp = PetscAbsScalar(*aa); aa++; 1987 row = brow + krow; /* row index */ 1988 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1989 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1990 } 1991 } 1992 aj++; 1993 } 1994 } 1995 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2001 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2002 { 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2012 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2013 { 2014 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2015 PetscFunctionBegin; 2016 *array = a->a; 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2022 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2023 { 2024 PetscFunctionBegin; 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #include "petscblaslapack.h" 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2031 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 2032 { 2033 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2034 PetscErrorCode ierr; 2035 PetscInt i,bs=Y->bs,j,bs2; 2036 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 2037 2038 PetscFunctionBegin; 2039 if (str == SAME_NONZERO_PATTERN) { 2040 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2041 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2042 if (y->xtoy && y->XtoY != X) { 2043 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2044 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2045 } 2046 if (!y->xtoy) { /* get xtoy */ 2047 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2048 y->XtoY = X; 2049 } 2050 bs2 = bs*bs; 2051 for (i=0; i<x->nz; i++) { 2052 j = 0; 2053 while (j < bs2){ 2054 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2055 j++; 2056 } 2057 } 2058 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2059 } else { 2060 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2061 } 2062 PetscFunctionReturn(0); 2063 } 2064 2065 /* -------------------------------------------------------------------*/ 2066 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2067 MatGetRow_SeqBAIJ, 2068 MatRestoreRow_SeqBAIJ, 2069 MatMult_SeqBAIJ_N, 2070 /* 4*/ MatMultAdd_SeqBAIJ_N, 2071 MatMultTranspose_SeqBAIJ, 2072 MatMultTransposeAdd_SeqBAIJ, 2073 MatSolve_SeqBAIJ_N, 2074 0, 2075 0, 2076 /*10*/ 0, 2077 MatLUFactor_SeqBAIJ, 2078 0, 2079 0, 2080 MatTranspose_SeqBAIJ, 2081 /*15*/ MatGetInfo_SeqBAIJ, 2082 MatEqual_SeqBAIJ, 2083 MatGetDiagonal_SeqBAIJ, 2084 MatDiagonalScale_SeqBAIJ, 2085 MatNorm_SeqBAIJ, 2086 /*20*/ 0, 2087 MatAssemblyEnd_SeqBAIJ, 2088 0, 2089 MatSetOption_SeqBAIJ, 2090 MatZeroEntries_SeqBAIJ, 2091 /*25*/ MatZeroRows_SeqBAIJ, 2092 MatLUFactorSymbolic_SeqBAIJ, 2093 MatLUFactorNumeric_SeqBAIJ_N, 2094 MatCholeskyFactorSymbolic_SeqBAIJ, 2095 MatCholeskyFactorNumeric_SeqBAIJ_N, 2096 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2097 MatILUFactorSymbolic_SeqBAIJ, 2098 MatICCFactorSymbolic_SeqBAIJ, 2099 MatGetArray_SeqBAIJ, 2100 MatRestoreArray_SeqBAIJ, 2101 /*35*/ MatDuplicate_SeqBAIJ, 2102 0, 2103 0, 2104 MatILUFactor_SeqBAIJ, 2105 0, 2106 /*40*/ MatAXPY_SeqBAIJ, 2107 MatGetSubMatrices_SeqBAIJ, 2108 MatIncreaseOverlap_SeqBAIJ, 2109 MatGetValues_SeqBAIJ, 2110 0, 2111 /*45*/ MatPrintHelp_SeqBAIJ, 2112 MatScale_SeqBAIJ, 2113 0, 2114 0, 2115 0, 2116 /*50*/ 0, 2117 MatGetRowIJ_SeqBAIJ, 2118 MatRestoreRowIJ_SeqBAIJ, 2119 0, 2120 0, 2121 /*55*/ 0, 2122 0, 2123 0, 2124 0, 2125 MatSetValuesBlocked_SeqBAIJ, 2126 /*60*/ MatGetSubMatrix_SeqBAIJ, 2127 MatDestroy_SeqBAIJ, 2128 MatView_SeqBAIJ, 2129 MatGetPetscMaps_Petsc, 2130 0, 2131 /*65*/ 0, 2132 0, 2133 0, 2134 0, 2135 0, 2136 /*70*/ MatGetRowMax_SeqBAIJ, 2137 MatConvert_Basic, 2138 0, 2139 0, 2140 0, 2141 /*75*/ 0, 2142 0, 2143 0, 2144 0, 2145 0, 2146 /*80*/ 0, 2147 0, 2148 0, 2149 0, 2150 MatLoad_SeqBAIJ, 2151 /*85*/ 0, 2152 0, 2153 0, 2154 0, 2155 0, 2156 /*90*/ 0, 2157 0, 2158 0, 2159 0, 2160 0, 2161 /*95*/ 0, 2162 0, 2163 0, 2164 0}; 2165 2166 EXTERN_C_BEGIN 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2169 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2170 { 2171 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2172 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 if (aij->nonew != 1) { 2177 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2178 } 2179 2180 /* allocate space for values if not already there */ 2181 if (!aij->saved_values) { 2182 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2183 } 2184 2185 /* copy values over */ 2186 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 EXTERN_C_END 2190 2191 EXTERN_C_BEGIN 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2194 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2195 { 2196 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2197 PetscErrorCode ierr; 2198 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2199 2200 PetscFunctionBegin; 2201 if (aij->nonew != 1) { 2202 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2203 } 2204 if (!aij->saved_values) { 2205 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2206 } 2207 2208 /* copy values over */ 2209 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 EXTERN_C_END 2213 2214 EXTERN_C_BEGIN 2215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2216 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2217 EXTERN_C_END 2218 2219 EXTERN_C_BEGIN 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2222 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2223 { 2224 Mat_SeqBAIJ *b; 2225 PetscErrorCode ierr; 2226 PetscInt i,mbs,nbs,bs2,newbs = bs; 2227 PetscTruth flg; 2228 2229 PetscFunctionBegin; 2230 2231 B->preallocated = PETSC_TRUE; 2232 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2233 if (nnz && newbs != bs) { 2234 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2235 } 2236 bs = newbs; 2237 2238 mbs = B->m/bs; 2239 nbs = B->n/bs; 2240 bs2 = bs*bs; 2241 2242 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2243 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2244 } 2245 2246 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2247 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2248 if (nnz) { 2249 for (i=0; i<mbs; i++) { 2250 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2251 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2252 } 2253 } 2254 2255 b = (Mat_SeqBAIJ*)B->data; 2256 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2257 B->ops->solve = MatSolve_SeqBAIJ_Update; 2258 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2259 if (!flg) { 2260 switch (bs) { 2261 case 1: 2262 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2263 B->ops->mult = MatMult_SeqBAIJ_1; 2264 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2265 break; 2266 case 2: 2267 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2268 B->ops->mult = MatMult_SeqBAIJ_2; 2269 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2270 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2271 break; 2272 case 3: 2273 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2274 B->ops->mult = MatMult_SeqBAIJ_3; 2275 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2276 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2277 break; 2278 case 4: 2279 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2280 B->ops->mult = MatMult_SeqBAIJ_4; 2281 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2282 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2283 break; 2284 case 5: 2285 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2286 B->ops->mult = MatMult_SeqBAIJ_5; 2287 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2288 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2289 break; 2290 case 6: 2291 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2292 B->ops->mult = MatMult_SeqBAIJ_6; 2293 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2294 break; 2295 case 7: 2296 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2297 B->ops->mult = MatMult_SeqBAIJ_7; 2298 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2299 break; 2300 default: 2301 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2302 B->ops->mult = MatMult_SeqBAIJ_N; 2303 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2304 break; 2305 } 2306 } 2307 B->bs = bs; 2308 b->mbs = mbs; 2309 b->nbs = nbs; 2310 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2311 if (!nnz) { 2312 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2313 else if (nz <= 0) nz = 1; 2314 for (i=0; i<mbs; i++) b->imax[i] = nz; 2315 nz = nz*mbs; 2316 } else { 2317 nz = 0; 2318 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2319 } 2320 2321 /* allocate the matrix space */ 2322 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 2323 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2324 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2325 b->singlemalloc = PETSC_TRUE; 2326 2327 b->i[0] = 0; 2328 for (i=1; i<mbs+1; i++) { 2329 b->i[i] = b->i[i-1] + b->imax[i-1]; 2330 } 2331 2332 /* b->ilen will count nonzeros in each block row so far. */ 2333 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2334 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2335 2336 B->bs = bs; 2337 b->bs2 = bs2; 2338 b->mbs = mbs; 2339 b->nz = 0; 2340 b->maxnz = nz*bs2; 2341 B->info.nz_unneeded = (PetscReal)b->maxnz; 2342 PetscFunctionReturn(0); 2343 } 2344 EXTERN_C_END 2345 2346 /*MC 2347 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2348 block sparse compressed row format. 2349 2350 Options Database Keys: 2351 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2352 2353 Level: beginner 2354 2355 .seealso: MatCreateSeqBAIJ 2356 M*/ 2357 2358 EXTERN_C_BEGIN 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatCreate_SeqBAIJ" 2361 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2362 { 2363 PetscErrorCode ierr; 2364 PetscMPIInt size; 2365 Mat_SeqBAIJ *b; 2366 2367 PetscFunctionBegin; 2368 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2369 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2370 2371 B->m = B->M = PetscMax(B->m,B->M); 2372 B->n = B->N = PetscMax(B->n,B->N); 2373 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2374 B->data = (void*)b; 2375 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2376 B->factor = 0; 2377 B->mapping = 0; 2378 b->row = 0; 2379 b->col = 0; 2380 b->icol = 0; 2381 b->reallocs = 0; 2382 b->saved_values = 0; 2383 #if defined(PETSC_USE_MAT_SINGLE) 2384 b->setvalueslen = 0; 2385 b->setvaluescopy = PETSC_NULL; 2386 #endif 2387 2388 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2389 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2390 2391 b->sorted = PETSC_FALSE; 2392 b->roworiented = PETSC_TRUE; 2393 b->nonew = 0; 2394 b->diag = 0; 2395 b->solve_work = 0; 2396 b->mult_work = 0; 2397 B->spptr = 0; 2398 B->info.nz_unneeded = (PetscReal)b->maxnz; 2399 b->keepzeroedrows = PETSC_FALSE; 2400 b->xtoy = 0; 2401 b->XtoY = 0; 2402 b->compressedrow.use = PETSC_FALSE; 2403 b->compressedrow.nrows = 0; 2404 b->compressedrow.i = PETSC_NULL; 2405 b->compressedrow.rindex = PETSC_NULL; 2406 b->compressedrow.checked = PETSC_FALSE; 2407 B->same_nonzero = PETSC_FALSE; 2408 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2410 "MatInvertBlockDiagonal_SeqBAIJ", 2411 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2413 "MatStoreValues_SeqBAIJ", 2414 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2416 "MatRetrieveValues_SeqBAIJ", 2417 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2419 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2420 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2421 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2422 "MatConvert_SeqBAIJ_SeqAIJ", 2423 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2424 #if !defined(PETSC_USE_64BIT_INT) 2425 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2426 "MatConvert_SeqBAIJ_SeqSBAIJ", 2427 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2428 #endif 2429 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2430 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2431 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 EXTERN_C_END 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2438 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2439 { 2440 Mat C; 2441 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2442 PetscErrorCode ierr; 2443 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2444 2445 PetscFunctionBegin; 2446 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2447 2448 *B = 0; 2449 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2450 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2451 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2452 c = (Mat_SeqBAIJ*)C->data; 2453 2454 C->M = A->M; 2455 C->N = A->N; 2456 C->bs = A->bs; 2457 c->bs2 = a->bs2; 2458 c->mbs = a->mbs; 2459 c->nbs = a->nbs; 2460 2461 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2462 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2463 for (i=0; i<mbs; i++) { 2464 c->imax[i] = a->imax[i]; 2465 c->ilen[i] = a->ilen[i]; 2466 } 2467 2468 /* allocate the matrix space */ 2469 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2470 c->singlemalloc = PETSC_TRUE; 2471 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2472 if (mbs > 0) { 2473 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2474 if (cpvalues == MAT_COPY_VALUES) { 2475 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2476 } else { 2477 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2478 } 2479 } 2480 c->sorted = a->sorted; 2481 c->roworiented = a->roworiented; 2482 c->nonew = a->nonew; 2483 2484 if (a->diag) { 2485 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2486 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2487 for (i=0; i<mbs; i++) { 2488 c->diag[i] = a->diag[i]; 2489 } 2490 } else c->diag = 0; 2491 c->nz = a->nz; 2492 c->maxnz = a->maxnz; 2493 c->solve_work = 0; 2494 c->mult_work = 0; 2495 C->preallocated = PETSC_TRUE; 2496 C->assembled = PETSC_TRUE; 2497 2498 c->compressedrow.use = a->compressedrow.use; 2499 c->compressedrow.nrows = a->compressedrow.nrows; 2500 c->compressedrow.checked = a->compressedrow.checked; 2501 if ( a->compressedrow.checked && a->compressedrow.use){ 2502 i = a->compressedrow.nrows; 2503 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2504 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2505 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2506 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2507 } else { 2508 c->compressedrow.use = PETSC_FALSE; 2509 c->compressedrow.i = PETSC_NULL; 2510 c->compressedrow.rindex = PETSC_NULL; 2511 } 2512 C->same_nonzero = A->same_nonzero; 2513 *B = C; 2514 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatLoad_SeqBAIJ" 2520 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2521 { 2522 Mat_SeqBAIJ *a; 2523 Mat B; 2524 PetscErrorCode ierr; 2525 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2526 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2527 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2528 PetscInt *masked,nmask,tmp,bs2,ishift; 2529 PetscMPIInt size; 2530 int fd; 2531 PetscScalar *aa; 2532 MPI_Comm comm = ((PetscObject)viewer)->comm; 2533 2534 PetscFunctionBegin; 2535 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2536 bs2 = bs*bs; 2537 2538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2539 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2540 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2541 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2542 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2543 M = header[1]; N = header[2]; nz = header[3]; 2544 2545 if (header[3] < 0) { 2546 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2547 } 2548 2549 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2550 2551 /* 2552 This code adds extra rows to make sure the number of rows is 2553 divisible by the blocksize 2554 */ 2555 mbs = M/bs; 2556 extra_rows = bs - M + bs*(mbs); 2557 if (extra_rows == bs) extra_rows = 0; 2558 else mbs++; 2559 if (extra_rows) { 2560 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2561 } 2562 2563 /* read in row lengths */ 2564 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2565 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2566 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2567 2568 /* read in column indices */ 2569 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2570 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2571 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2572 2573 /* loop over row lengths determining block row lengths */ 2574 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2575 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2576 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2577 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2578 masked = mask + mbs; 2579 rowcount = 0; nzcount = 0; 2580 for (i=0; i<mbs; i++) { 2581 nmask = 0; 2582 for (j=0; j<bs; j++) { 2583 kmax = rowlengths[rowcount]; 2584 for (k=0; k<kmax; k++) { 2585 tmp = jj[nzcount++]/bs; 2586 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2587 } 2588 rowcount++; 2589 } 2590 browlengths[i] += nmask; 2591 /* zero out the mask elements we set */ 2592 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2593 } 2594 2595 /* create our matrix */ 2596 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2597 ierr = MatSetType(B,type);CHKERRQ(ierr); 2598 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2599 a = (Mat_SeqBAIJ*)B->data; 2600 2601 /* set matrix "i" values */ 2602 a->i[0] = 0; 2603 for (i=1; i<= mbs; i++) { 2604 a->i[i] = a->i[i-1] + browlengths[i-1]; 2605 a->ilen[i-1] = browlengths[i-1]; 2606 } 2607 a->nz = 0; 2608 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2609 2610 /* read in nonzero values */ 2611 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2612 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2613 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2614 2615 /* set "a" and "j" values into matrix */ 2616 nzcount = 0; jcount = 0; 2617 for (i=0; i<mbs; i++) { 2618 nzcountb = nzcount; 2619 nmask = 0; 2620 for (j=0; j<bs; j++) { 2621 kmax = rowlengths[i*bs+j]; 2622 for (k=0; k<kmax; k++) { 2623 tmp = jj[nzcount++]/bs; 2624 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2625 } 2626 } 2627 /* sort the masked values */ 2628 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2629 2630 /* set "j" values into matrix */ 2631 maskcount = 1; 2632 for (j=0; j<nmask; j++) { 2633 a->j[jcount++] = masked[j]; 2634 mask[masked[j]] = maskcount++; 2635 } 2636 /* set "a" values into matrix */ 2637 ishift = bs2*a->i[i]; 2638 for (j=0; j<bs; j++) { 2639 kmax = rowlengths[i*bs+j]; 2640 for (k=0; k<kmax; k++) { 2641 tmp = jj[nzcountb]/bs ; 2642 block = mask[tmp] - 1; 2643 point = jj[nzcountb] - bs*tmp; 2644 idx = ishift + bs2*block + j + bs*point; 2645 a->a[idx] = (MatScalar)aa[nzcountb++]; 2646 } 2647 } 2648 /* zero out the mask elements we set */ 2649 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2650 } 2651 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2652 2653 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2654 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2655 ierr = PetscFree(aa);CHKERRQ(ierr); 2656 ierr = PetscFree(jj);CHKERRQ(ierr); 2657 ierr = PetscFree(mask);CHKERRQ(ierr); 2658 2659 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = MatView_Private(B);CHKERRQ(ierr); 2662 2663 *A = B; 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatCreateSeqBAIJ" 2669 /*@C 2670 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2671 compressed row) format. For good matrix assembly performance the 2672 user should preallocate the matrix storage by setting the parameter nz 2673 (or the array nnz). By setting these parameters accurately, performance 2674 during matrix assembly can be increased by more than a factor of 50. 2675 2676 Collective on MPI_Comm 2677 2678 Input Parameters: 2679 + comm - MPI communicator, set to PETSC_COMM_SELF 2680 . bs - size of block 2681 . m - number of rows 2682 . n - number of columns 2683 . nz - number of nonzero blocks per block row (same for all rows) 2684 - nnz - array containing the number of nonzero blocks in the various block rows 2685 (possibly different for each block row) or PETSC_NULL 2686 2687 Output Parameter: 2688 . A - the matrix 2689 2690 Options Database Keys: 2691 . -mat_no_unroll - uses code that does not unroll the loops in the 2692 block calculations (much slower) 2693 . -mat_block_size - size of the blocks to use 2694 2695 Level: intermediate 2696 2697 Notes: 2698 The number of rows and columns must be divisible by blocksize. 2699 2700 If the nnz parameter is given then the nz parameter is ignored 2701 2702 A nonzero block is any block that as 1 or more nonzeros in it 2703 2704 The block AIJ format is fully compatible with standard Fortran 77 2705 storage. That is, the stored row and column indices can begin at 2706 either one (as in Fortran) or zero. See the users' manual for details. 2707 2708 Specify the preallocated storage with either nz or nnz (not both). 2709 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2710 allocation. For additional details, see the users manual chapter on 2711 matrices. 2712 2713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2714 @*/ 2715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2716 { 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2721 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2722 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2728 /*@C 2729 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2730 per row in the matrix. For good matrix assembly performance the 2731 user should preallocate the matrix storage by setting the parameter nz 2732 (or the array nnz). By setting these parameters accurately, performance 2733 during matrix assembly can be increased by more than a factor of 50. 2734 2735 Collective on MPI_Comm 2736 2737 Input Parameters: 2738 + A - the matrix 2739 . bs - size of block 2740 . nz - number of block nonzeros per block row (same for all rows) 2741 - nnz - array containing the number of block nonzeros in the various block rows 2742 (possibly different for each block row) or PETSC_NULL 2743 2744 Options Database Keys: 2745 . -mat_no_unroll - uses code that does not unroll the loops in the 2746 block calculations (much slower) 2747 . -mat_block_size - size of the blocks to use 2748 2749 Level: intermediate 2750 2751 Notes: 2752 If the nnz parameter is given then the nz parameter is ignored 2753 2754 The block AIJ format is fully compatible with standard Fortran 77 2755 storage. That is, the stored row and column indices can begin at 2756 either one (as in Fortran) or zero. See the users' manual for details. 2757 2758 Specify the preallocated storage with either nz or nnz (not both). 2759 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2760 allocation. For additional details, see the users manual chapter on 2761 matrices. 2762 2763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2764 @*/ 2765 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2766 { 2767 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2768 2769 PetscFunctionBegin; 2770 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2771 if (f) { 2772 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2773 } 2774 PetscFunctionReturn(0); 2775 } 2776 2777