1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for Fun3d sequential benchmark 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 stepval = (n-1)*4; 590 for (k=0; k<m; k++) { /* loop over added rows */ 591 row = im[k]; 592 rp = aj + ai[row]; 593 ap = aa + 16*ai[row]; 594 nrow = ailen[row]; 595 low = 0; 596 high = nrow; 597 for (l=0; l<n; l++) { /* loop over added columns */ 598 col = in[l]; 599 if (col <= lastcol) low = 0; else high = nrow; 600 lastcol = col; 601 value = v + k*(stepval+4)*4 + l*4; 602 while (high-low > 7) { 603 t = (low+high)/2; 604 if (rp[t] > col) high = t; 605 else low = t; 606 } 607 for (i=low; i<high; i++) { 608 if (rp[i] > col) break; 609 if (rp[i] == col) { 610 bap = ap + 16*i; 611 for (ii=0; ii<4; ii++,value+=stepval) { 612 for (jj=ii; jj<16; jj+=4) { 613 bap[jj] += *value++; 614 } 615 } 616 goto noinsert2; 617 } 618 } 619 N = nrow++ - 1; 620 high++; /* added new column index thus must search to one higher than before */ 621 /* shift up all the later entries in this row */ 622 for (ii=N; ii>=i; ii--) { 623 rp[ii+1] = rp[ii]; 624 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 625 } 626 if (N >= i) { 627 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 628 } 629 rp[i] = col; 630 bap = ap + 16*i; 631 for (ii=0; ii<4; ii++,value+=stepval) { 632 for (jj=ii; jj<16; jj+=4) { 633 bap[jj] = *value++; 634 } 635 } 636 noinsert2:; 637 low = i; 638 } 639 ailen[row] = nrow; 640 } 641 PetscFunctionReturnVoid(); 642 } 643 EXTERN_C_END 644 645 #if defined(PETSC_HAVE_FORTRAN_CAPS) 646 #define matsetvalues4_ MATSETVALUES4 647 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 648 #define matsetvalues4_ matsetvalues4 649 #endif 650 651 EXTERN_C_BEGIN 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatSetValues4_" 654 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 655 { 656 Mat A = *AA; 657 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 658 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 659 PetscInt *ai=a->i,*ailen=a->ilen; 660 PetscInt *aj=a->j,brow,bcol; 661 PetscInt ridx,cidx,lastcol = -1; 662 MatScalar *ap,value,*aa=a->a,*bap; 663 664 PetscFunctionBegin; 665 for (k=0; k<m; k++) { /* loop over added rows */ 666 row = im[k]; brow = row/4; 667 rp = aj + ai[brow]; 668 ap = aa + 16*ai[brow]; 669 nrow = ailen[brow]; 670 low = 0; 671 high = nrow; 672 for (l=0; l<n; l++) { /* loop over added columns */ 673 col = in[l]; bcol = col/4; 674 ridx = row % 4; cidx = col % 4; 675 value = v[l + k*n]; 676 if (col <= lastcol) low = 0; else high = nrow; 677 lastcol = col; 678 while (high-low > 7) { 679 t = (low+high)/2; 680 if (rp[t] > bcol) high = t; 681 else low = t; 682 } 683 for (i=low; i<high; i++) { 684 if (rp[i] > bcol) break; 685 if (rp[i] == bcol) { 686 bap = ap + 16*i + 4*cidx + ridx; 687 *bap += value; 688 goto noinsert1; 689 } 690 } 691 N = nrow++ - 1; 692 high++; /* added new column thus must search to one higher than before */ 693 /* shift up all the later entries in this row */ 694 for (ii=N; ii>=i; ii--) { 695 rp[ii+1] = rp[ii]; 696 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 697 } 698 if (N>=i) { 699 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 700 } 701 rp[i] = bcol; 702 ap[16*i + 4*cidx + ridx] = value; 703 noinsert1:; 704 low = i; 705 } 706 ailen[brow] = nrow; 707 } 708 PetscFunctionReturnVoid(); 709 } 710 EXTERN_C_END 711 712 /* UGLY, ugly, ugly 713 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 714 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 715 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 716 converts the entries into single precision and then calls ..._MatScalar() to put them 717 into the single precision data structures. 718 */ 719 #if defined(PETSC_USE_MAT_SINGLE) 720 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 721 #else 722 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 723 #endif 724 725 #define CHUNKSIZE 10 726 727 /* 728 Checks for missing diagonals 729 */ 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 732 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 733 { 734 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 735 PetscErrorCode ierr; 736 PetscInt *diag,*jj = a->j,i; 737 738 PetscFunctionBegin; 739 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 740 diag = a->diag; 741 for (i=0; i<a->mbs; i++) { 742 if (jj[diag[i]] != i) { 743 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 744 } 745 } 746 PetscFunctionReturn(0); 747 } 748 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 751 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 752 { 753 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 754 PetscErrorCode ierr; 755 PetscInt i,j,*diag,m = a->mbs; 756 757 PetscFunctionBegin; 758 if (a->diag) PetscFunctionReturn(0); 759 760 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 761 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 762 for (i=0; i<m; i++) { 763 diag[i] = a->i[i+1]; 764 for (j=a->i[i]; j<a->i[i+1]; j++) { 765 if (a->j[j] == i) { 766 diag[i] = j; 767 break; 768 } 769 } 770 } 771 a->diag = diag; 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt n = a->mbs,i; 785 786 PetscFunctionBegin; 787 *nn = n; 788 if (!ia) PetscFunctionReturn(0); 789 if (symmetric) { 790 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 791 } else if (oshift == 1) { 792 /* temporarily add 1 to i and j indices */ 793 PetscInt nz = a->i[n]; 794 for (i=0; i<nz; i++) a->j[i]++; 795 for (i=0; i<n+1; i++) a->i[i]++; 796 *ia = a->i; *ja = a->j; 797 } else { 798 *ia = a->i; *ja = a->j; 799 } 800 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscErrorCode ierr; 810 PetscInt i,n = a->mbs; 811 812 PetscFunctionBegin; 813 if (!ia) PetscFunctionReturn(0); 814 if (symmetric) { 815 ierr = PetscFree(*ia);CHKERRQ(ierr); 816 ierr = PetscFree(*ja);CHKERRQ(ierr); 817 } else if (oshift == 1) { 818 PetscInt nz = a->i[n]-1; 819 for (i=0; i<nz; i++) a->j[i]--; 820 for (i=0; i<n+1; i++) a->i[i]--; 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatDestroy_SeqBAIJ" 827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 #if defined(PETSC_USE_LOG) 834 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 835 #endif 836 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 844 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 845 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 846 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 847 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 848 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 849 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 850 #if defined(PETSC_USE_MAT_SINGLE) 851 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 852 #endif 853 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 854 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 855 856 ierr = PetscFree(a);CHKERRQ(ierr); 857 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetOption_SeqBAIJ" 870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 871 { 872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 switch (op) { 877 case MAT_ROW_ORIENTED: 878 a->roworiented = PETSC_TRUE; 879 break; 880 case MAT_COLUMN_ORIENTED: 881 a->roworiented = PETSC_FALSE; 882 break; 883 case MAT_COLUMNS_SORTED: 884 a->sorted = PETSC_TRUE; 885 break; 886 case MAT_COLUMNS_UNSORTED: 887 a->sorted = PETSC_FALSE; 888 break; 889 case MAT_KEEP_ZEROED_ROWS: 890 a->keepzeroedrows = PETSC_TRUE; 891 break; 892 case MAT_NO_NEW_NONZERO_LOCATIONS: 893 a->nonew = 1; 894 break; 895 case MAT_NEW_NONZERO_LOCATION_ERR: 896 a->nonew = -1; 897 break; 898 case MAT_NEW_NONZERO_ALLOCATION_ERR: 899 a->nonew = -2; 900 break; 901 case MAT_YES_NEW_NONZERO_LOCATIONS: 902 a->nonew = 0; 903 break; 904 case MAT_ROWS_SORTED: 905 case MAT_ROWS_UNSORTED: 906 case MAT_YES_NEW_DIAGONALS: 907 case MAT_IGNORE_OFF_PROC_ENTRIES: 908 case MAT_USE_HASH_TABLE: 909 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 910 break; 911 case MAT_NO_NEW_DIAGONALS: 912 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 913 case MAT_SYMMETRIC: 914 case MAT_STRUCTURALLY_SYMMETRIC: 915 case MAT_NOT_SYMMETRIC: 916 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 917 case MAT_HERMITIAN: 918 case MAT_NOT_HERMITIAN: 919 case MAT_SYMMETRY_ETERNAL: 920 case MAT_NOT_SYMMETRY_ETERNAL: 921 break; 922 default: 923 SETERRQ(PETSC_ERR_SUP,"unknown option"); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatGetRow_SeqBAIJ" 930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 931 { 932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 933 PetscErrorCode ierr; 934 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 935 MatScalar *aa,*aa_i; 936 PetscScalar *v_i; 937 938 PetscFunctionBegin; 939 bs = A->bs; 940 ai = a->i; 941 aj = a->j; 942 aa = a->a; 943 bs2 = a->bs2; 944 945 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 946 947 bn = row/bs; /* Block number */ 948 bp = row % bs; /* Block Position */ 949 M = ai[bn+1] - ai[bn]; 950 *nz = bs*M; 951 952 if (v) { 953 *v = 0; 954 if (*nz) { 955 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 956 for (i=0; i<M; i++) { /* for each block in the block row */ 957 v_i = *v + i*bs; 958 aa_i = aa + bs2*(ai[bn] + i); 959 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 960 } 961 } 962 } 963 964 if (idx) { 965 *idx = 0; 966 if (*nz) { 967 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 968 for (i=0; i<M; i++) { /* for each block in the block row */ 969 idx_i = *idx + i*bs; 970 itmp = bs*aj[ai[bn] + i]; 971 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 972 } 973 } 974 } 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 981 { 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 986 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatTranspose_SeqBAIJ" 992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 993 { 994 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 995 Mat C; 996 PetscErrorCode ierr; 997 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 998 PetscInt *rows,*cols,bs2=a->bs2; 999 PetscScalar *array; 1000 1001 PetscFunctionBegin; 1002 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1003 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1004 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1005 1006 #if defined(PETSC_USE_MAT_SINGLE) 1007 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1008 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1009 #else 1010 array = a->a; 1011 #endif 1012 1013 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1014 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1015 ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr); 1016 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1017 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1018 ierr = PetscFree(col);CHKERRQ(ierr); 1019 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1020 cols = rows + bs; 1021 for (i=0; i<mbs; i++) { 1022 cols[0] = i*bs; 1023 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1024 len = ai[i+1] - ai[i]; 1025 for (j=0; j<len; j++) { 1026 rows[0] = (*aj++)*bs; 1027 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1028 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1029 array += bs2; 1030 } 1031 } 1032 ierr = PetscFree(rows);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_MAT_SINGLE) 1034 ierr = PetscFree(array); 1035 #endif 1036 1037 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 1040 if (B) { 1041 *B = C; 1042 } else { 1043 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1055 int fd; 1056 PetscScalar *aa; 1057 FILE *file; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1061 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1062 col_lens[0] = MAT_FILE_COOKIE; 1063 1064 col_lens[1] = A->m; 1065 col_lens[2] = A->n; 1066 col_lens[3] = a->nz*bs2; 1067 1068 /* store lengths of each row and write (including header) to file */ 1069 count = 0; 1070 for (i=0; i<a->mbs; i++) { 1071 for (j=0; j<bs; j++) { 1072 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1073 } 1074 } 1075 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1076 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1077 1078 /* store column indices (zero start index) */ 1079 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1080 count = 0; 1081 for (i=0; i<a->mbs; i++) { 1082 for (j=0; j<bs; j++) { 1083 for (k=a->i[i]; k<a->i[i+1]; k++) { 1084 for (l=0; l<bs; l++) { 1085 jj[count++] = bs*a->j[k] + l; 1086 } 1087 } 1088 } 1089 } 1090 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1091 ierr = PetscFree(jj);CHKERRQ(ierr); 1092 1093 /* store nonzero values */ 1094 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1095 count = 0; 1096 for (i=0; i<a->mbs; i++) { 1097 for (j=0; j<bs; j++) { 1098 for (k=a->i[i]; k<a->i[i+1]; k++) { 1099 for (l=0; l<bs; l++) { 1100 aa[count++] = a->a[bs2*k + l*bs + j]; 1101 } 1102 } 1103 } 1104 } 1105 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1106 ierr = PetscFree(aa);CHKERRQ(ierr); 1107 1108 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1109 if (file) { 1110 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1118 { 1119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1122 PetscViewerFormat format; 1123 1124 PetscFunctionBegin; 1125 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1126 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1127 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1128 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1129 Mat aij; 1130 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1131 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1132 ierr = MatDestroy(aij);CHKERRQ(ierr); 1133 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1134 PetscFunctionReturn(0); 1135 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1136 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1137 for (i=0; i<a->mbs; i++) { 1138 for (j=0; j<bs; j++) { 1139 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1140 for (k=a->i[i]; k<a->i[i+1]; k++) { 1141 for (l=0; l<bs; l++) { 1142 #if defined(PETSC_USE_COMPLEX) 1143 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1144 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1145 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1146 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } 1152 #else 1153 if (a->a[bs2*k + l*bs + j] != 0.0) { 1154 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1155 } 1156 #endif 1157 } 1158 } 1159 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1160 } 1161 } 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1163 } else { 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1165 for (i=0; i<a->mbs; i++) { 1166 for (j=0; j<bs; j++) { 1167 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1168 for (k=a->i[i]; k<a->i[i+1]; k++) { 1169 for (l=0; l<bs; l++) { 1170 #if defined(PETSC_USE_COMPLEX) 1171 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1173 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1174 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } 1180 #else 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1182 #endif 1183 } 1184 } 1185 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1186 } 1187 } 1188 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1189 } 1190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1197 { 1198 Mat A = (Mat) Aa; 1199 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1202 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1203 MatScalar *aa; 1204 PetscViewer viewer; 1205 1206 PetscFunctionBegin; 1207 1208 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1209 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1210 1211 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1212 1213 /* loop over matrix elements drawing boxes */ 1214 color = PETSC_DRAW_BLUE; 1215 for (i=0,row=0; i<mbs; i++,row+=bs) { 1216 for (j=a->i[i]; j<a->i[i+1]; j++) { 1217 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1218 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1219 aa = a->a + j*bs2; 1220 for (k=0; k<bs; k++) { 1221 for (l=0; l<bs; l++) { 1222 if (PetscRealPart(*aa++) >= 0.) continue; 1223 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 color = PETSC_DRAW_CYAN; 1229 for (i=0,row=0; i<mbs; i++,row+=bs) { 1230 for (j=a->i[i]; j<a->i[i+1]; j++) { 1231 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1232 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1233 aa = a->a + j*bs2; 1234 for (k=0; k<bs; k++) { 1235 for (l=0; l<bs; l++) { 1236 if (PetscRealPart(*aa++) != 0.) continue; 1237 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1238 } 1239 } 1240 } 1241 } 1242 1243 color = PETSC_DRAW_RED; 1244 for (i=0,row=0; i<mbs; i++,row+=bs) { 1245 for (j=a->i[i]; j<a->i[i+1]; j++) { 1246 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1248 aa = a->a + j*bs2; 1249 for (k=0; k<bs; k++) { 1250 for (l=0; l<bs; l++) { 1251 if (PetscRealPart(*aa++) <= 0.) continue; 1252 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1253 } 1254 } 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1263 { 1264 PetscErrorCode ierr; 1265 PetscReal xl,yl,xr,yr,w,h; 1266 PetscDraw draw; 1267 PetscTruth isnull; 1268 1269 PetscFunctionBegin; 1270 1271 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1272 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1273 1274 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1275 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1276 xr += w; yr += h; xl = -w; yl = -h; 1277 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1278 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1279 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_SeqBAIJ" 1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1286 { 1287 PetscErrorCode ierr; 1288 PetscTruth iascii,isbinary,isdraw; 1289 1290 PetscFunctionBegin; 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1294 if (iascii){ 1295 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1296 } else if (isbinary) { 1297 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1298 } else if (isdraw) { 1299 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1300 } else { 1301 Mat B; 1302 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1303 ierr = MatView(B,viewer);CHKERRQ(ierr); 1304 ierr = MatDestroy(B);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1316 PetscInt *ai = a->i,*ailen = a->ilen; 1317 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1318 MatScalar *ap,*aa = a->a,zero = 0.0; 1319 1320 PetscFunctionBegin; 1321 for (k=0; k<m; k++) { /* loop over rows */ 1322 row = im[k]; brow = row/bs; 1323 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1324 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1325 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1326 nrow = ailen[brow]; 1327 for (l=0; l<n; l++) { /* loop over columns */ 1328 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1329 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1330 col = in[l] ; 1331 bcol = col/bs; 1332 cidx = col%bs; 1333 ridx = row%bs; 1334 high = nrow; 1335 low = 0; /* assume unsorted */ 1336 while (high-low > 5) { 1337 t = (low+high)/2; 1338 if (rp[t] > bcol) high = t; 1339 else low = t; 1340 } 1341 for (i=low; i<high; i++) { 1342 if (rp[i] > bcol) break; 1343 if (rp[i] == bcol) { 1344 *v++ = ap[bs2*i+bs*cidx+ridx]; 1345 goto finished; 1346 } 1347 } 1348 *v++ = zero; 1349 finished:; 1350 } 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #if defined(PETSC_USE_MAT_SINGLE) 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1359 { 1360 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 PetscInt i,N = m*n*b->bs2; 1363 MatScalar *vsingle; 1364 1365 PetscFunctionBegin; 1366 if (N > b->setvalueslen) { 1367 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1368 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1369 b->setvalueslen = N; 1370 } 1371 vsingle = b->setvaluescopy; 1372 for (i=0; i<N; i++) { 1373 vsingle[i] = v[i]; 1374 } 1375 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 #endif 1379 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1384 { 1385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1386 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1387 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1388 PetscErrorCode ierr; 1389 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1390 PetscTruth roworiented=a->roworiented; 1391 const MatScalar *value = v; 1392 MatScalar *ap,*aa = a->a,*bap; 1393 1394 PetscFunctionBegin; 1395 if (roworiented) { 1396 stepval = (n-1)*bs; 1397 } else { 1398 stepval = (m-1)*bs; 1399 } 1400 for (k=0; k<m; k++) { /* loop over added rows */ 1401 row = im[k]; 1402 if (row < 0) continue; 1403 #if defined(PETSC_USE_DEBUG) 1404 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1405 #endif 1406 rp = aj + ai[row]; 1407 ap = aa + bs2*ai[row]; 1408 rmax = imax[row]; 1409 nrow = ailen[row]; 1410 low = 0; 1411 high = nrow; 1412 for (l=0; l<n; l++) { /* loop over added columns */ 1413 if (in[l] < 0) continue; 1414 #if defined(PETSC_USE_DEBUG) 1415 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1416 #endif 1417 col = in[l]; 1418 if (roworiented) { 1419 value = v + k*(stepval+bs)*bs + l*bs; 1420 } else { 1421 value = v + l*(stepval+bs)*bs + k*bs; 1422 } 1423 if (col <= lastcol) low = 0; else high = nrow; 1424 lastcol = col; 1425 while (high-low > 7) { 1426 t = (low+high)/2; 1427 if (rp[t] > col) high = t; 1428 else low = t; 1429 } 1430 for (i=low; i<high; i++) { 1431 if (rp[i] > col) break; 1432 if (rp[i] == col) { 1433 bap = ap + bs2*i; 1434 if (roworiented) { 1435 if (is == ADD_VALUES) { 1436 for (ii=0; ii<bs; ii++,value+=stepval) { 1437 for (jj=ii; jj<bs2; jj+=bs) { 1438 bap[jj] += *value++; 1439 } 1440 } 1441 } else { 1442 for (ii=0; ii<bs; ii++,value+=stepval) { 1443 for (jj=ii; jj<bs2; jj+=bs) { 1444 bap[jj] = *value++; 1445 } 1446 } 1447 } 1448 } else { 1449 if (is == ADD_VALUES) { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=0; jj<bs; jj++) { 1452 *bap++ += *value++; 1453 } 1454 } 1455 } else { 1456 for (ii=0; ii<bs; ii++,value+=stepval) { 1457 for (jj=0; jj<bs; jj++) { 1458 *bap++ = *value++; 1459 } 1460 } 1461 } 1462 } 1463 goto noinsert2; 1464 } 1465 } 1466 if (nonew == 1) goto noinsert2; 1467 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1468 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1469 N = nrow++ - 1; high++; 1470 /* shift up all the later entries in this row */ 1471 for (ii=N; ii>=i; ii--) { 1472 rp[ii+1] = rp[ii]; 1473 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1474 } 1475 if (N >= i) { 1476 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1477 } 1478 rp[i] = col; 1479 bap = ap + bs2*i; 1480 if (roworiented) { 1481 for (ii=0; ii<bs; ii++,value+=stepval) { 1482 for (jj=ii; jj<bs2; jj+=bs) { 1483 bap[jj] = *value++; 1484 } 1485 } 1486 } else { 1487 for (ii=0; ii<bs; ii++,value+=stepval) { 1488 for (jj=0; jj<bs; jj++) { 1489 *bap++ = *value++; 1490 } 1491 } 1492 } 1493 noinsert2:; 1494 low = i; 1495 } 1496 ailen[row] = nrow; 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1503 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1504 { 1505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1506 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1507 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1508 PetscErrorCode ierr; 1509 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1510 MatScalar *aa = a->a,*ap; 1511 PetscReal ratio=0.6; 1512 1513 PetscFunctionBegin; 1514 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1515 1516 if (m) rmax = ailen[0]; 1517 for (i=1; i<mbs; i++) { 1518 /* move each row back by the amount of empty slots (fshift) before it*/ 1519 fshift += imax[i-1] - ailen[i-1]; 1520 rmax = PetscMax(rmax,ailen[i]); 1521 if (fshift) { 1522 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1523 N = ailen[i]; 1524 for (j=0; j<N; j++) { 1525 ip[j-fshift] = ip[j]; 1526 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1527 } 1528 } 1529 ai[i] = ai[i-1] + ailen[i-1]; 1530 } 1531 if (mbs) { 1532 fshift += imax[mbs-1] - ailen[mbs-1]; 1533 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1534 } 1535 /* reset ilen and imax for each row */ 1536 for (i=0; i<mbs; i++) { 1537 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1538 } 1539 a->nz = ai[mbs]; 1540 1541 /* diagonals may have moved, so kill the diagonal pointers */ 1542 a->idiagvalid = PETSC_FALSE; 1543 if (fshift && a->diag) { 1544 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1545 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1546 a->diag = 0; 1547 } 1548 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1549 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1550 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1551 a->reallocs = 0; 1552 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1553 1554 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1555 if (a->compressedrow.use){ 1556 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1557 } 1558 1559 A->same_nonzero = PETSC_TRUE; 1560 PetscFunctionReturn(0); 1561 } 1562 1563 /* 1564 This function returns an array of flags which indicate the locations of contiguous 1565 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1566 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1567 Assume: sizes should be long enough to hold all the values. 1568 */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1572 { 1573 PetscInt i,j,k,row; 1574 PetscTruth flg; 1575 1576 PetscFunctionBegin; 1577 for (i=0,j=0; i<n; j++) { 1578 row = idx[i]; 1579 if (row%bs!=0) { /* Not the begining of a block */ 1580 sizes[j] = 1; 1581 i++; 1582 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1583 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1584 i++; 1585 } else { /* Begining of the block, so check if the complete block exists */ 1586 flg = PETSC_TRUE; 1587 for (k=1; k<bs; k++) { 1588 if (row+k != idx[i+k]) { /* break in the block */ 1589 flg = PETSC_FALSE; 1590 break; 1591 } 1592 } 1593 if (flg) { /* No break in the bs */ 1594 sizes[j] = bs; 1595 i+= bs; 1596 } else { 1597 sizes[j] = 1; 1598 i++; 1599 } 1600 } 1601 } 1602 *bs_max = j; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1609 { 1610 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1611 PetscErrorCode ierr; 1612 PetscInt i,j,k,count,*rows; 1613 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1614 PetscScalar zero = 0.0; 1615 MatScalar *aa; 1616 1617 PetscFunctionBegin; 1618 /* Make a copy of the IS and sort it */ 1619 /* allocate memory for rows,sizes */ 1620 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1621 sizes = rows + is_n; 1622 1623 /* copy IS values to rows, and sort them */ 1624 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1625 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1626 if (baij->keepzeroedrows) { 1627 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1628 bs_max = is_n; 1629 A->same_nonzero = PETSC_TRUE; 1630 } else { 1631 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1632 A->same_nonzero = PETSC_FALSE; 1633 } 1634 1635 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1636 row = rows[j]; 1637 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1638 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1639 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1640 if (sizes[i] == bs && !baij->keepzeroedrows) { 1641 if (diag != 0.0) { 1642 if (baij->ilen[row/bs] > 0) { 1643 baij->ilen[row/bs] = 1; 1644 baij->j[baij->i[row/bs]] = row/bs; 1645 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1646 } 1647 /* Now insert all the diagonal values for this bs */ 1648 for (k=0; k<bs; k++) { 1649 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1650 } 1651 } else { /* (diag == 0.0) */ 1652 baij->ilen[row/bs] = 0; 1653 } /* end (diag == 0.0) */ 1654 } else { /* (sizes[i] != bs) */ 1655 #if defined (PETSC_USE_DEBUG) 1656 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1657 #endif 1658 for (k=0; k<count; k++) { 1659 aa[0] = zero; 1660 aa += bs; 1661 } 1662 if (diag != 0.0) { 1663 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1664 } 1665 } 1666 } 1667 1668 ierr = PetscFree(rows);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1675 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1676 { 1677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1678 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1679 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1680 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1681 PetscErrorCode ierr; 1682 PetscInt ridx,cidx,bs2=a->bs2; 1683 PetscTruth roworiented=a->roworiented; 1684 MatScalar *ap,value,*aa=a->a,*bap; 1685 1686 PetscFunctionBegin; 1687 for (k=0; k<m; k++) { /* loop over added rows */ 1688 row = im[k]; 1689 brow = row/bs; 1690 if (row < 0) continue; 1691 #if defined(PETSC_USE_DEBUG) 1692 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1693 #endif 1694 rp = aj + ai[brow]; 1695 ap = aa + bs2*ai[brow]; 1696 rmax = imax[brow]; 1697 nrow = ailen[brow]; 1698 low = 0; 1699 high = nrow; 1700 for (l=0; l<n; l++) { /* loop over added columns */ 1701 if (in[l] < 0) continue; 1702 #if defined(PETSC_USE_DEBUG) 1703 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1704 #endif 1705 col = in[l]; bcol = col/bs; 1706 ridx = row % bs; cidx = col % bs; 1707 if (roworiented) { 1708 value = v[l + k*n]; 1709 } else { 1710 value = v[k + l*m]; 1711 } 1712 if (col <= lastcol) low = 0; else high = nrow; 1713 lastcol = col; 1714 while (high-low > 7) { 1715 t = (low+high)/2; 1716 if (rp[t] > bcol) high = t; 1717 else low = t; 1718 } 1719 for (i=low; i<high; i++) { 1720 if (rp[i] > bcol) break; 1721 if (rp[i] == bcol) { 1722 bap = ap + bs2*i + bs*cidx + ridx; 1723 if (is == ADD_VALUES) *bap += value; 1724 else *bap = value; 1725 goto noinsert1; 1726 } 1727 } 1728 if (nonew == 1) goto noinsert1; 1729 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1730 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1731 N = nrow++ - 1; high++; 1732 /* shift up all the later entries in this row */ 1733 for (ii=N; ii>=i; ii--) { 1734 rp[ii+1] = rp[ii]; 1735 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1736 } 1737 if (N>=i) { 1738 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1739 } 1740 rp[i] = bcol; 1741 ap[bs2*i + bs*cidx + ridx] = value; 1742 a->nz++; 1743 noinsert1:; 1744 low = i; 1745 } 1746 ailen[brow] = nrow; 1747 } 1748 A->same_nonzero = PETSC_FALSE; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1755 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1756 { 1757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1758 Mat outA; 1759 PetscErrorCode ierr; 1760 PetscTruth row_identity,col_identity; 1761 1762 PetscFunctionBegin; 1763 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1764 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1765 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1766 if (!row_identity || !col_identity) { 1767 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1768 } 1769 1770 outA = inA; 1771 inA->factor = FACTOR_LU; 1772 1773 if (!a->diag) { 1774 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1775 } 1776 1777 a->row = row; 1778 a->col = col; 1779 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1780 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1781 1782 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1783 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1784 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1785 1786 /* 1787 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1788 for ILU(0) factorization with natural ordering 1789 */ 1790 if (inA->bs < 8) { 1791 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1792 } else { 1793 if (!a->solve_work) { 1794 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1795 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1796 } 1797 } 1798 1799 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1800 1801 PetscFunctionReturn(0); 1802 } 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1805 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1806 { 1807 static PetscTruth called = PETSC_FALSE; 1808 MPI_Comm comm = A->comm; 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1813 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1814 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1815 PetscFunctionReturn(0); 1816 } 1817 1818 EXTERN_C_BEGIN 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1821 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1822 { 1823 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1824 PetscInt i,nz,nbs; 1825 1826 PetscFunctionBegin; 1827 nz = baij->maxnz/baij->bs2; 1828 nbs = baij->nbs; 1829 for (i=0; i<nz; i++) { 1830 baij->j[i] = indices[i]; 1831 } 1832 baij->nz = nz; 1833 for (i=0; i<nbs; i++) { 1834 baij->ilen[i] = baij->imax[i]; 1835 } 1836 1837 PetscFunctionReturn(0); 1838 } 1839 EXTERN_C_END 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1843 /*@ 1844 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1845 in the matrix. 1846 1847 Input Parameters: 1848 + mat - the SeqBAIJ matrix 1849 - indices - the column indices 1850 1851 Level: advanced 1852 1853 Notes: 1854 This can be called if you have precomputed the nonzero structure of the 1855 matrix and want to provide it to the matrix object to improve the performance 1856 of the MatSetValues() operation. 1857 1858 You MUST have set the correct numbers of nonzeros per row in the call to 1859 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1860 1861 MUST be called before any calls to MatSetValues(); 1862 1863 @*/ 1864 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1865 { 1866 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1867 1868 PetscFunctionBegin; 1869 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1870 PetscValidPointer(indices,2); 1871 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1872 if (f) { 1873 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1874 } else { 1875 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1876 } 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1882 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1883 { 1884 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1885 PetscErrorCode ierr; 1886 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1887 PetscReal atmp; 1888 PetscScalar *x,zero = 0.0; 1889 MatScalar *aa; 1890 PetscInt ncols,brow,krow,kcol; 1891 1892 PetscFunctionBegin; 1893 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1894 bs = A->bs; 1895 aa = a->a; 1896 ai = a->i; 1897 aj = a->j; 1898 mbs = a->mbs; 1899 1900 ierr = VecSet(v,zero);CHKERRQ(ierr); 1901 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1902 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1903 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1904 for (i=0; i<mbs; i++) { 1905 ncols = ai[1] - ai[0]; ai++; 1906 brow = bs*i; 1907 for (j=0; j<ncols; j++){ 1908 /* bcol = bs*(*aj); */ 1909 for (kcol=0; kcol<bs; kcol++){ 1910 for (krow=0; krow<bs; krow++){ 1911 atmp = PetscAbsScalar(*aa); aa++; 1912 row = brow + krow; /* row index */ 1913 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1914 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1915 } 1916 } 1917 aj++; 1918 } 1919 } 1920 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatCopy_SeqBAIJ" 1926 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1927 { 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 /* If the two matrices have the same copy implementation, use fast copy. */ 1932 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1934 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1935 1936 if (a->i[A->m] != b->i[B->m]) { 1937 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1938 } 1939 ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr); 1940 } else { 1941 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1948 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1949 { 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1959 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1960 { 1961 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1962 PetscFunctionBegin; 1963 *array = a->a; 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1969 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1970 { 1971 PetscFunctionBegin; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #include "petscblaslapack.h" 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1978 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1979 { 1980 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1981 PetscErrorCode ierr; 1982 PetscInt i,bs=Y->bs,j,bs2; 1983 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1984 1985 PetscFunctionBegin; 1986 if (str == SAME_NONZERO_PATTERN) { 1987 PetscScalar alpha = a; 1988 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1989 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1990 if (y->xtoy && y->XtoY != X) { 1991 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1992 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1993 } 1994 if (!y->xtoy) { /* get xtoy */ 1995 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1996 y->XtoY = X; 1997 } 1998 bs2 = bs*bs; 1999 for (i=0; i<x->nz; i++) { 2000 j = 0; 2001 while (j < bs2){ 2002 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2003 j++; 2004 } 2005 } 2006 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2007 } else { 2008 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2015 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2016 { 2017 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2018 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2019 PetscScalar *aa = a->a; 2020 2021 PetscFunctionBegin; 2022 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2028 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2029 { 2030 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2031 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2032 PetscScalar *aa = a->a; 2033 2034 PetscFunctionBegin; 2035 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 2040 /* -------------------------------------------------------------------*/ 2041 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2042 MatGetRow_SeqBAIJ, 2043 MatRestoreRow_SeqBAIJ, 2044 MatMult_SeqBAIJ_N, 2045 /* 4*/ MatMultAdd_SeqBAIJ_N, 2046 MatMultTranspose_SeqBAIJ, 2047 MatMultTransposeAdd_SeqBAIJ, 2048 MatSolve_SeqBAIJ_N, 2049 0, 2050 0, 2051 /*10*/ 0, 2052 MatLUFactor_SeqBAIJ, 2053 0, 2054 0, 2055 MatTranspose_SeqBAIJ, 2056 /*15*/ MatGetInfo_SeqBAIJ, 2057 MatEqual_SeqBAIJ, 2058 MatGetDiagonal_SeqBAIJ, 2059 MatDiagonalScale_SeqBAIJ, 2060 MatNorm_SeqBAIJ, 2061 /*20*/ 0, 2062 MatAssemblyEnd_SeqBAIJ, 2063 0, 2064 MatSetOption_SeqBAIJ, 2065 MatZeroEntries_SeqBAIJ, 2066 /*25*/ MatZeroRows_SeqBAIJ, 2067 MatLUFactorSymbolic_SeqBAIJ, 2068 MatLUFactorNumeric_SeqBAIJ_N, 2069 MatCholeskyFactorSymbolic_SeqBAIJ, 2070 MatCholeskyFactorNumeric_SeqBAIJ_N, 2071 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2072 MatILUFactorSymbolic_SeqBAIJ, 2073 MatICCFactorSymbolic_SeqBAIJ, 2074 MatGetArray_SeqBAIJ, 2075 MatRestoreArray_SeqBAIJ, 2076 /*35*/ MatDuplicate_SeqBAIJ, 2077 0, 2078 0, 2079 MatILUFactor_SeqBAIJ, 2080 0, 2081 /*40*/ MatAXPY_SeqBAIJ, 2082 MatGetSubMatrices_SeqBAIJ, 2083 MatIncreaseOverlap_SeqBAIJ, 2084 MatGetValues_SeqBAIJ, 2085 MatCopy_SeqBAIJ, 2086 /*45*/ MatPrintHelp_SeqBAIJ, 2087 MatScale_SeqBAIJ, 2088 0, 2089 0, 2090 0, 2091 /*50*/ 0, 2092 MatGetRowIJ_SeqBAIJ, 2093 MatRestoreRowIJ_SeqBAIJ, 2094 0, 2095 0, 2096 /*55*/ 0, 2097 0, 2098 0, 2099 0, 2100 MatSetValuesBlocked_SeqBAIJ, 2101 /*60*/ MatGetSubMatrix_SeqBAIJ, 2102 MatDestroy_SeqBAIJ, 2103 MatView_SeqBAIJ, 2104 MatGetPetscMaps_Petsc, 2105 0, 2106 /*65*/ 0, 2107 0, 2108 0, 2109 0, 2110 0, 2111 /*70*/ MatGetRowMax_SeqBAIJ, 2112 MatConvert_Basic, 2113 0, 2114 0, 2115 0, 2116 /*75*/ 0, 2117 0, 2118 0, 2119 0, 2120 0, 2121 /*80*/ 0, 2122 0, 2123 0, 2124 0, 2125 MatLoad_SeqBAIJ, 2126 /*85*/ 0, 2127 0, 2128 0, 2129 0, 2130 0, 2131 /*90*/ 0, 2132 0, 2133 0, 2134 0, 2135 0, 2136 /*95*/ 0, 2137 0, 2138 0, 2139 0, 2140 0, 2141 /*100*/0, 2142 0, 2143 0, 2144 0, 2145 0, 2146 /*105*/0, 2147 MatRealPart_SeqBAIJ, 2148 MatImaginaryPart_SeqBAIJ 2149 }; 2150 2151 EXTERN_C_BEGIN 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2154 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2155 { 2156 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2157 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 if (aij->nonew != 1) { 2162 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2163 } 2164 2165 /* allocate space for values if not already there */ 2166 if (!aij->saved_values) { 2167 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2168 } 2169 2170 /* copy values over */ 2171 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 EXTERN_C_END 2175 2176 EXTERN_C_BEGIN 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2179 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2180 { 2181 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2182 PetscErrorCode ierr; 2183 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2184 2185 PetscFunctionBegin; 2186 if (aij->nonew != 1) { 2187 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2188 } 2189 if (!aij->saved_values) { 2190 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2191 } 2192 2193 /* copy values over */ 2194 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 EXTERN_C_END 2198 2199 EXTERN_C_BEGIN 2200 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2202 EXTERN_C_END 2203 2204 EXTERN_C_BEGIN 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2207 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2208 { 2209 Mat_SeqBAIJ *b; 2210 PetscErrorCode ierr; 2211 PetscInt i,mbs,nbs,bs2,newbs = bs; 2212 PetscTruth flg,skipallocation = PETSC_FALSE; 2213 2214 PetscFunctionBegin; 2215 2216 if (nz == MAT_SKIP_ALLOCATION) { 2217 skipallocation = PETSC_TRUE; 2218 nz = 0; 2219 } 2220 2221 B->preallocated = PETSC_TRUE; 2222 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2223 if (nnz && newbs != bs) { 2224 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2225 } 2226 bs = newbs; 2227 2228 mbs = B->m/bs; 2229 nbs = B->n/bs; 2230 bs2 = bs*bs; 2231 2232 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2233 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2234 } 2235 2236 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2237 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2238 if (nnz) { 2239 for (i=0; i<mbs; i++) { 2240 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2241 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2242 } 2243 } 2244 2245 b = (Mat_SeqBAIJ*)B->data; 2246 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2247 B->ops->solve = MatSolve_SeqBAIJ_Update; 2248 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2249 if (!flg) { 2250 switch (bs) { 2251 case 1: 2252 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2253 B->ops->mult = MatMult_SeqBAIJ_1; 2254 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2255 break; 2256 case 2: 2257 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2258 B->ops->mult = MatMult_SeqBAIJ_2; 2259 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2260 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2261 break; 2262 case 3: 2263 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2264 B->ops->mult = MatMult_SeqBAIJ_3; 2265 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2266 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2267 break; 2268 case 4: 2269 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2270 B->ops->mult = MatMult_SeqBAIJ_4; 2271 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2272 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2273 break; 2274 case 5: 2275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2276 B->ops->mult = MatMult_SeqBAIJ_5; 2277 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2278 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2279 break; 2280 case 6: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2282 B->ops->mult = MatMult_SeqBAIJ_6; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2284 break; 2285 case 7: 2286 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2287 B->ops->mult = MatMult_SeqBAIJ_7; 2288 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2289 break; 2290 default: 2291 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2292 B->ops->mult = MatMult_SeqBAIJ_N; 2293 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2294 break; 2295 } 2296 } 2297 B->bs = bs; 2298 b->mbs = mbs; 2299 b->nbs = nbs; 2300 if (!skipallocation) { 2301 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2302 /* b->ilen will count nonzeros in each block row so far. */ 2303 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2304 if (!nnz) { 2305 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2306 else if (nz <= 0) nz = 1; 2307 for (i=0; i<mbs; i++) b->imax[i] = nz; 2308 nz = nz*mbs; 2309 } else { 2310 nz = 0; 2311 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2312 } 2313 2314 /* allocate the matrix space */ 2315 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 2316 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2317 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2318 b->singlemalloc = PETSC_TRUE; 2319 2320 b->i[0] = 0; 2321 for (i=1; i<mbs+1; i++) { 2322 b->i[i] = b->i[i-1] + b->imax[i-1]; 2323 } 2324 } 2325 2326 B->bs = bs; 2327 b->bs2 = bs2; 2328 b->mbs = mbs; 2329 b->nz = 0; 2330 b->maxnz = nz*bs2; 2331 B->info.nz_unneeded = (PetscReal)b->maxnz; 2332 PetscFunctionReturn(0); 2333 } 2334 EXTERN_C_END 2335 2336 /*MC 2337 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2338 block sparse compressed row format. 2339 2340 Options Database Keys: 2341 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2342 2343 Level: beginner 2344 2345 .Seealso: MatCreateSeqBAIJ 2346 M*/ 2347 2348 EXTERN_C_BEGIN 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatCreate_SeqBAIJ" 2351 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2352 { 2353 PetscErrorCode ierr; 2354 PetscMPIInt size; 2355 Mat_SeqBAIJ *b; 2356 2357 PetscFunctionBegin; 2358 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2359 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2360 2361 B->m = B->M = PetscMax(B->m,B->M); 2362 B->n = B->N = PetscMax(B->n,B->N); 2363 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2364 B->data = (void*)b; 2365 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2366 B->factor = 0; 2367 B->mapping = 0; 2368 b->row = 0; 2369 b->col = 0; 2370 b->icol = 0; 2371 b->reallocs = 0; 2372 b->saved_values = 0; 2373 #if defined(PETSC_USE_MAT_SINGLE) 2374 b->setvalueslen = 0; 2375 b->setvaluescopy = PETSC_NULL; 2376 #endif 2377 2378 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2379 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2380 2381 b->sorted = PETSC_FALSE; 2382 b->roworiented = PETSC_TRUE; 2383 b->nonew = 0; 2384 b->diag = 0; 2385 b->solve_work = 0; 2386 b->mult_work = 0; 2387 B->spptr = 0; 2388 B->info.nz_unneeded = (PetscReal)b->maxnz; 2389 b->keepzeroedrows = PETSC_FALSE; 2390 b->xtoy = 0; 2391 b->XtoY = 0; 2392 b->compressedrow.use = PETSC_FALSE; 2393 b->compressedrow.nrows = 0; 2394 b->compressedrow.i = PETSC_NULL; 2395 b->compressedrow.rindex = PETSC_NULL; 2396 b->compressedrow.checked = PETSC_FALSE; 2397 B->same_nonzero = PETSC_FALSE; 2398 2399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2400 "MatInvertBlockDiagonal_SeqBAIJ", 2401 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2403 "MatStoreValues_SeqBAIJ", 2404 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2405 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2406 "MatRetrieveValues_SeqBAIJ", 2407 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2409 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2410 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2412 "MatConvert_SeqBAIJ_SeqAIJ", 2413 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2415 "MatConvert_SeqBAIJ_SeqSBAIJ", 2416 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2417 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2418 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2419 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 EXTERN_C_END 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2426 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2427 { 2428 Mat C; 2429 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2430 PetscErrorCode ierr; 2431 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2432 2433 PetscFunctionBegin; 2434 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2435 2436 *B = 0; 2437 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2438 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 2439 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2440 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2441 c = (Mat_SeqBAIJ*)C->data; 2442 2443 C->M = A->M; 2444 C->N = A->N; 2445 C->bs = A->bs; 2446 c->bs2 = a->bs2; 2447 c->mbs = a->mbs; 2448 c->nbs = a->nbs; 2449 2450 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2451 for (i=0; i<mbs; i++) { 2452 c->imax[i] = a->imax[i]; 2453 c->ilen[i] = a->ilen[i]; 2454 } 2455 2456 /* allocate the matrix space */ 2457 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2458 c->singlemalloc = PETSC_TRUE; 2459 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2460 if (mbs > 0) { 2461 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2462 if (cpvalues == MAT_COPY_VALUES) { 2463 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2464 } else { 2465 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2466 } 2467 } 2468 c->sorted = a->sorted; 2469 c->roworiented = a->roworiented; 2470 c->nonew = a->nonew; 2471 2472 if (a->diag) { 2473 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2474 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2475 for (i=0; i<mbs; i++) { 2476 c->diag[i] = a->diag[i]; 2477 } 2478 } else c->diag = 0; 2479 c->nz = a->nz; 2480 c->maxnz = a->maxnz; 2481 c->solve_work = 0; 2482 c->mult_work = 0; 2483 C->preallocated = PETSC_TRUE; 2484 C->assembled = PETSC_TRUE; 2485 2486 c->compressedrow.use = a->compressedrow.use; 2487 c->compressedrow.nrows = a->compressedrow.nrows; 2488 c->compressedrow.checked = a->compressedrow.checked; 2489 if ( a->compressedrow.checked && a->compressedrow.use){ 2490 i = a->compressedrow.nrows; 2491 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2492 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2493 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2494 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2495 } else { 2496 c->compressedrow.use = PETSC_FALSE; 2497 c->compressedrow.i = PETSC_NULL; 2498 c->compressedrow.rindex = PETSC_NULL; 2499 } 2500 C->same_nonzero = A->same_nonzero; 2501 *B = C; 2502 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatLoad_SeqBAIJ" 2508 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2509 { 2510 Mat_SeqBAIJ *a; 2511 Mat B; 2512 PetscErrorCode ierr; 2513 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2514 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2515 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2516 PetscInt *masked,nmask,tmp,bs2,ishift; 2517 PetscMPIInt size; 2518 int fd; 2519 PetscScalar *aa; 2520 MPI_Comm comm = ((PetscObject)viewer)->comm; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2524 bs2 = bs*bs; 2525 2526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2527 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2528 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2529 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2530 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2531 M = header[1]; N = header[2]; nz = header[3]; 2532 2533 if (header[3] < 0) { 2534 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2535 } 2536 2537 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2538 2539 /* 2540 This code adds extra rows to make sure the number of rows is 2541 divisible by the blocksize 2542 */ 2543 mbs = M/bs; 2544 extra_rows = bs - M + bs*(mbs); 2545 if (extra_rows == bs) extra_rows = 0; 2546 else mbs++; 2547 if (extra_rows) { 2548 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2549 } 2550 2551 /* read in row lengths */ 2552 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2553 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2554 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2555 2556 /* read in column indices */ 2557 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2558 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2559 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2560 2561 /* loop over row lengths determining block row lengths */ 2562 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2563 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2564 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2565 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2566 masked = mask + mbs; 2567 rowcount = 0; nzcount = 0; 2568 for (i=0; i<mbs; i++) { 2569 nmask = 0; 2570 for (j=0; j<bs; j++) { 2571 kmax = rowlengths[rowcount]; 2572 for (k=0; k<kmax; k++) { 2573 tmp = jj[nzcount++]/bs; 2574 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2575 } 2576 rowcount++; 2577 } 2578 browlengths[i] += nmask; 2579 /* zero out the mask elements we set */ 2580 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2581 } 2582 2583 /* create our matrix */ 2584 ierr = MatCreate(comm,&B); 2585 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2586 ierr = MatSetType(B,type);CHKERRQ(ierr); 2587 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2588 a = (Mat_SeqBAIJ*)B->data; 2589 2590 /* set matrix "i" values */ 2591 a->i[0] = 0; 2592 for (i=1; i<= mbs; i++) { 2593 a->i[i] = a->i[i-1] + browlengths[i-1]; 2594 a->ilen[i-1] = browlengths[i-1]; 2595 } 2596 a->nz = 0; 2597 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2598 2599 /* read in nonzero values */ 2600 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2601 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2602 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2603 2604 /* set "a" and "j" values into matrix */ 2605 nzcount = 0; jcount = 0; 2606 for (i=0; i<mbs; i++) { 2607 nzcountb = nzcount; 2608 nmask = 0; 2609 for (j=0; j<bs; j++) { 2610 kmax = rowlengths[i*bs+j]; 2611 for (k=0; k<kmax; k++) { 2612 tmp = jj[nzcount++]/bs; 2613 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2614 } 2615 } 2616 /* sort the masked values */ 2617 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2618 2619 /* set "j" values into matrix */ 2620 maskcount = 1; 2621 for (j=0; j<nmask; j++) { 2622 a->j[jcount++] = masked[j]; 2623 mask[masked[j]] = maskcount++; 2624 } 2625 /* set "a" values into matrix */ 2626 ishift = bs2*a->i[i]; 2627 for (j=0; j<bs; j++) { 2628 kmax = rowlengths[i*bs+j]; 2629 for (k=0; k<kmax; k++) { 2630 tmp = jj[nzcountb]/bs ; 2631 block = mask[tmp] - 1; 2632 point = jj[nzcountb] - bs*tmp; 2633 idx = ishift + bs2*block + j + bs*point; 2634 a->a[idx] = (MatScalar)aa[nzcountb++]; 2635 } 2636 } 2637 /* zero out the mask elements we set */ 2638 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2639 } 2640 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2641 2642 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2643 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2644 ierr = PetscFree(aa);CHKERRQ(ierr); 2645 ierr = PetscFree(jj);CHKERRQ(ierr); 2646 ierr = PetscFree(mask);CHKERRQ(ierr); 2647 2648 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatView_Private(B);CHKERRQ(ierr); 2651 2652 *A = B; 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatCreateSeqBAIJ" 2658 /*@C 2659 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2660 compressed row) format. For good matrix assembly performance the 2661 user should preallocate the matrix storage by setting the parameter nz 2662 (or the array nnz). By setting these parameters accurately, performance 2663 during matrix assembly can be increased by more than a factor of 50. 2664 2665 Collective on MPI_Comm 2666 2667 Input Parameters: 2668 + comm - MPI communicator, set to PETSC_COMM_SELF 2669 . bs - size of block 2670 . m - number of rows 2671 . n - number of columns 2672 . nz - number of nonzero blocks per block row (same for all rows) 2673 - nnz - array containing the number of nonzero blocks in the various block rows 2674 (possibly different for each block row) or PETSC_NULL 2675 2676 Output Parameter: 2677 . A - the matrix 2678 2679 Options Database Keys: 2680 . -mat_no_unroll - uses code that does not unroll the loops in the 2681 block calculations (much slower) 2682 . -mat_block_size - size of the blocks to use 2683 2684 Level: intermediate 2685 2686 Notes: 2687 The number of rows and columns must be divisible by blocksize. 2688 2689 If the nnz parameter is given then the nz parameter is ignored 2690 2691 A nonzero block is any block that as 1 or more nonzeros in it 2692 2693 The block AIJ format is fully compatible with standard Fortran 77 2694 storage. That is, the stored row and column indices can begin at 2695 either one (as in Fortran) or zero. See the users' manual for details. 2696 2697 Specify the preallocated storage with either nz or nnz (not both). 2698 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2699 allocation. For additional details, see the users manual chapter on 2700 matrices. 2701 2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2703 @*/ 2704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2705 { 2706 PetscErrorCode ierr; 2707 2708 PetscFunctionBegin; 2709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2710 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2711 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2712 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2718 /*@C 2719 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2720 per row in the matrix. For good matrix assembly performance the 2721 user should preallocate the matrix storage by setting the parameter nz 2722 (or the array nnz). By setting these parameters accurately, performance 2723 during matrix assembly can be increased by more than a factor of 50. 2724 2725 Collective on MPI_Comm 2726 2727 Input Parameters: 2728 + A - the matrix 2729 . bs - size of block 2730 . nz - number of block nonzeros per block row (same for all rows) 2731 - nnz - array containing the number of block nonzeros in the various block rows 2732 (possibly different for each block row) or PETSC_NULL 2733 2734 Options Database Keys: 2735 . -mat_no_unroll - uses code that does not unroll the loops in the 2736 block calculations (much slower) 2737 . -mat_block_size - size of the blocks to use 2738 2739 Level: intermediate 2740 2741 Notes: 2742 If the nnz parameter is given then the nz parameter is ignored 2743 2744 The block AIJ format is fully compatible with standard Fortran 77 2745 storage. That is, the stored row and column indices can begin at 2746 either one (as in Fortran) or zero. See the users' manual for details. 2747 2748 Specify the preallocated storage with either nz or nnz (not both). 2749 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2750 allocation. For additional details, see the users manual chapter on 2751 matrices. 2752 2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2754 @*/ 2755 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2756 { 2757 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2758 2759 PetscFunctionBegin; 2760 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2761 if (f) { 2762 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2763 } 2764 PetscFunctionReturn(0); 2765 } 2766 2767