1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for Fun3d sequential benchmark 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 stepval = (n-1)*4; 590 for (k=0; k<m; k++) { /* loop over added rows */ 591 row = im[k]; 592 rp = aj + ai[row]; 593 ap = aa + 16*ai[row]; 594 nrow = ailen[row]; 595 low = 0; 596 for (l=0; l<n; l++) { /* loop over added columns */ 597 col = in[l]; 598 value = v + k*(stepval+4)*4 + l*4; 599 low = 0; high = nrow; 600 while (high-low > 7) { 601 t = (low+high)/2; 602 if (rp[t] > col) high = t; 603 else low = t; 604 } 605 for (i=low; i<high; i++) { 606 if (rp[i] > col) break; 607 if (rp[i] == col) { 608 bap = ap + 16*i; 609 for (ii=0; ii<4; ii++,value+=stepval) { 610 for (jj=ii; jj<16; jj+=4) { 611 bap[jj] += *value++; 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 N = nrow++ - 1; 618 /* shift up all the later entries in this row */ 619 for (ii=N; ii>=i; ii--) { 620 rp[ii+1] = rp[ii]; 621 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 622 } 623 if (N >= i) { 624 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 625 } 626 rp[i] = col; 627 bap = ap + 16*i; 628 for (ii=0; ii<4; ii++,value+=stepval) { 629 for (jj=ii; jj<16; jj+=4) { 630 bap[jj] = *value++; 631 } 632 } 633 noinsert2:; 634 low = i; 635 } 636 ailen[row] = nrow; 637 } 638 PetscFunctionReturnVoid(); 639 } 640 EXTERN_C_END 641 642 #if defined(PETSC_HAVE_FORTRAN_CAPS) 643 #define matsetvalues4_ MATSETVALUES4 644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 645 #define matsetvalues4_ matsetvalues4 646 #endif 647 648 EXTERN_C_BEGIN 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSetValues4_" 651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 652 { 653 Mat A = *AA; 654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 655 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 656 PetscInt *ai=a->i,*ailen=a->ilen; 657 PetscInt *aj=a->j,brow,bcol; 658 PetscInt ridx,cidx; 659 MatScalar *ap,value,*aa=a->a,*bap; 660 661 PetscFunctionBegin; 662 for (k=0; k<m; k++) { /* loop over added rows */ 663 row = im[k]; brow = row/4; 664 rp = aj + ai[brow]; 665 ap = aa + 16*ai[brow]; 666 nrow = ailen[brow]; 667 low = 0; 668 for (l=0; l<n; l++) { /* loop over added columns */ 669 col = in[l]; bcol = col/4; 670 ridx = row % 4; cidx = col % 4; 671 value = v[l + k*n]; 672 low = 0; high = nrow; 673 while (high-low > 7) { 674 t = (low+high)/2; 675 if (rp[t] > bcol) high = t; 676 else low = t; 677 } 678 for (i=low; i<high; i++) { 679 if (rp[i] > bcol) break; 680 if (rp[i] == bcol) { 681 bap = ap + 16*i + 4*cidx + ridx; 682 *bap += value; 683 goto noinsert1; 684 } 685 } 686 N = nrow++ - 1; 687 /* shift up all the later entries in this row */ 688 for (ii=N; ii>=i; ii--) { 689 rp[ii+1] = rp[ii]; 690 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 691 } 692 if (N>=i) { 693 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 694 } 695 rp[i] = bcol; 696 ap[16*i + 4*cidx + ridx] = value; 697 noinsert1:; 698 low = i; 699 } 700 ailen[brow] = nrow; 701 } 702 PetscFunctionReturnVoid(); 703 } 704 EXTERN_C_END 705 706 /* UGLY, ugly, ugly 707 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 708 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 709 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 710 converts the entries into single precision and then calls ..._MatScalar() to put them 711 into the single precision data structures. 712 */ 713 #if defined(PETSC_USE_MAT_SINGLE) 714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 715 #else 716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 717 #endif 718 719 #define CHUNKSIZE 10 720 721 /* 722 Checks for missing diagonals 723 */ 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 727 { 728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 729 PetscErrorCode ierr; 730 PetscInt *diag,*jj = a->j,i; 731 732 PetscFunctionBegin; 733 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 734 diag = a->diag; 735 for (i=0; i<a->mbs; i++) { 736 if (jj[diag[i]] != i) { 737 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 746 { 747 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscInt i,j,*diag,m = a->mbs; 750 751 PetscFunctionBegin; 752 if (a->diag) PetscFunctionReturn(0); 753 754 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 755 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 756 for (i=0; i<m; i++) { 757 diag[i] = a->i[i+1]; 758 for (j=a->i[i]; j<a->i[i+1]; j++) { 759 if (a->j[j] == i) { 760 diag[i] = j; 761 break; 762 } 763 } 764 } 765 a->diag = diag; 766 PetscFunctionReturn(0); 767 } 768 769 770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 775 { 776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 777 PetscErrorCode ierr; 778 PetscInt n = a->mbs,i; 779 780 PetscFunctionBegin; 781 *nn = n; 782 if (!ia) PetscFunctionReturn(0); 783 if (symmetric) { 784 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 785 } else if (oshift == 1) { 786 /* temporarily add 1 to i and j indices */ 787 PetscInt nz = a->i[n]; 788 for (i=0; i<nz; i++) a->j[i]++; 789 for (i=0; i<n+1; i++) a->i[i]++; 790 *ia = a->i; *ja = a->j; 791 } else { 792 *ia = a->i; *ja = a->j; 793 } 794 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 801 { 802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt i,n = a->mbs; 805 806 PetscFunctionBegin; 807 if (!ia) PetscFunctionReturn(0); 808 if (symmetric) { 809 ierr = PetscFree(*ia);CHKERRQ(ierr); 810 ierr = PetscFree(*ja);CHKERRQ(ierr); 811 } else if (oshift == 1) { 812 PetscInt nz = a->i[n]-1; 813 for (i=0; i<nz; i++) a->j[i]--; 814 for (i=0; i<n+1; i++) a->i[i]--; 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatDestroy_SeqBAIJ" 821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 822 { 823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 #if defined(PETSC_USE_LOG) 828 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 829 #endif 830 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 831 if (a->row) { 832 ierr = ISDestroy(a->row);CHKERRQ(ierr); 833 } 834 if (a->col) { 835 ierr = ISDestroy(a->col);CHKERRQ(ierr); 836 } 837 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 838 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 839 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 840 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 841 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 842 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 843 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 844 #if defined(PETSC_USE_MAT_SINGLE) 845 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 846 #endif 847 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 848 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 849 850 ierr = PetscFree(a);CHKERRQ(ierr); 851 852 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatSetOption_SeqBAIJ" 864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 865 { 866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 switch (op) { 871 case MAT_ROW_ORIENTED: 872 a->roworiented = PETSC_TRUE; 873 break; 874 case MAT_COLUMN_ORIENTED: 875 a->roworiented = PETSC_FALSE; 876 break; 877 case MAT_COLUMNS_SORTED: 878 a->sorted = PETSC_TRUE; 879 break; 880 case MAT_COLUMNS_UNSORTED: 881 a->sorted = PETSC_FALSE; 882 break; 883 case MAT_KEEP_ZEROED_ROWS: 884 a->keepzeroedrows = PETSC_TRUE; 885 break; 886 case MAT_NO_NEW_NONZERO_LOCATIONS: 887 a->nonew = 1; 888 break; 889 case MAT_NEW_NONZERO_LOCATION_ERR: 890 a->nonew = -1; 891 break; 892 case MAT_NEW_NONZERO_ALLOCATION_ERR: 893 a->nonew = -2; 894 break; 895 case MAT_YES_NEW_NONZERO_LOCATIONS: 896 a->nonew = 0; 897 break; 898 case MAT_ROWS_SORTED: 899 case MAT_ROWS_UNSORTED: 900 case MAT_YES_NEW_DIAGONALS: 901 case MAT_IGNORE_OFF_PROC_ENTRIES: 902 case MAT_USE_HASH_TABLE: 903 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 904 break; 905 case MAT_NO_NEW_DIAGONALS: 906 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 907 case MAT_SYMMETRIC: 908 case MAT_STRUCTURALLY_SYMMETRIC: 909 case MAT_NOT_SYMMETRIC: 910 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 911 case MAT_HERMITIAN: 912 case MAT_NOT_HERMITIAN: 913 case MAT_SYMMETRY_ETERNAL: 914 case MAT_NOT_SYMMETRY_ETERNAL: 915 break; 916 default: 917 SETERRQ(PETSC_ERR_SUP,"unknown option"); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatGetRow_SeqBAIJ" 924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 925 { 926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 927 PetscErrorCode ierr; 928 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 929 MatScalar *aa,*aa_i; 930 PetscScalar *v_i; 931 932 PetscFunctionBegin; 933 bs = A->bs; 934 ai = a->i; 935 aj = a->j; 936 aa = a->a; 937 bs2 = a->bs2; 938 939 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 940 941 bn = row/bs; /* Block number */ 942 bp = row % bs; /* Block Position */ 943 M = ai[bn+1] - ai[bn]; 944 *nz = bs*M; 945 946 if (v) { 947 *v = 0; 948 if (*nz) { 949 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 950 for (i=0; i<M; i++) { /* for each block in the block row */ 951 v_i = *v + i*bs; 952 aa_i = aa + bs2*(ai[bn] + i); 953 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 954 } 955 } 956 } 957 958 if (idx) { 959 *idx = 0; 960 if (*nz) { 961 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 962 for (i=0; i<M; i++) { /* for each block in the block row */ 963 idx_i = *idx + i*bs; 964 itmp = bs*aj[ai[bn] + i]; 965 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 966 } 967 } 968 } 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 975 { 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 980 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatTranspose_SeqBAIJ" 986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 987 { 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 989 Mat C; 990 PetscErrorCode ierr; 991 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 992 PetscInt *rows,*cols,bs2=a->bs2; 993 PetscScalar *array; 994 995 PetscFunctionBegin; 996 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 997 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 998 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 999 1000 #if defined(PETSC_USE_MAT_SINGLE) 1001 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1002 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1003 #else 1004 array = a->a; 1005 #endif 1006 1007 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1008 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1009 ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr); 1010 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1011 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1012 ierr = PetscFree(col);CHKERRQ(ierr); 1013 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1014 cols = rows + bs; 1015 for (i=0; i<mbs; i++) { 1016 cols[0] = i*bs; 1017 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1018 len = ai[i+1] - ai[i]; 1019 for (j=0; j<len; j++) { 1020 rows[0] = (*aj++)*bs; 1021 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1022 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1023 array += bs2; 1024 } 1025 } 1026 ierr = PetscFree(rows);CHKERRQ(ierr); 1027 #if defined(PETSC_USE_MAT_SINGLE) 1028 ierr = PetscFree(array); 1029 #endif 1030 1031 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1033 1034 if (B) { 1035 *B = C; 1036 } else { 1037 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1045 { 1046 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1047 PetscErrorCode ierr; 1048 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1049 int fd; 1050 PetscScalar *aa; 1051 FILE *file; 1052 1053 PetscFunctionBegin; 1054 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1055 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1056 col_lens[0] = MAT_FILE_COOKIE; 1057 1058 col_lens[1] = A->m; 1059 col_lens[2] = A->n; 1060 col_lens[3] = a->nz*bs2; 1061 1062 /* store lengths of each row and write (including header) to file */ 1063 count = 0; 1064 for (i=0; i<a->mbs; i++) { 1065 for (j=0; j<bs; j++) { 1066 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1067 } 1068 } 1069 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1070 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1071 1072 /* store column indices (zero start index) */ 1073 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1074 count = 0; 1075 for (i=0; i<a->mbs; i++) { 1076 for (j=0; j<bs; j++) { 1077 for (k=a->i[i]; k<a->i[i+1]; k++) { 1078 for (l=0; l<bs; l++) { 1079 jj[count++] = bs*a->j[k] + l; 1080 } 1081 } 1082 } 1083 } 1084 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1085 ierr = PetscFree(jj);CHKERRQ(ierr); 1086 1087 /* store nonzero values */ 1088 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1089 count = 0; 1090 for (i=0; i<a->mbs; i++) { 1091 for (j=0; j<bs; j++) { 1092 for (k=a->i[i]; k<a->i[i+1]; k++) { 1093 for (l=0; l<bs; l++) { 1094 aa[count++] = a->a[bs2*k + l*bs + j]; 1095 } 1096 } 1097 } 1098 } 1099 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1100 ierr = PetscFree(aa);CHKERRQ(ierr); 1101 1102 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1103 if (file) { 1104 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1116 PetscViewerFormat format; 1117 1118 PetscFunctionBegin; 1119 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1120 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1121 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1122 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1123 Mat aij; 1124 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1125 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1126 ierr = MatDestroy(aij);CHKERRQ(ierr); 1127 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1128 PetscFunctionReturn(0); 1129 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1130 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1131 for (i=0; i<a->mbs; i++) { 1132 for (j=0; j<bs; j++) { 1133 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1134 for (k=a->i[i]; k<a->i[i+1]; k++) { 1135 for (l=0; l<bs; l++) { 1136 #if defined(PETSC_USE_COMPLEX) 1137 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1138 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1139 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1140 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1141 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1142 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1143 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1144 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1145 } 1146 #else 1147 if (a->a[bs2*k + l*bs + j] != 0.0) { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1149 } 1150 #endif 1151 } 1152 } 1153 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1154 } 1155 } 1156 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1157 } else { 1158 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1159 for (i=0; i<a->mbs; i++) { 1160 for (j=0; j<bs; j++) { 1161 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1162 for (k=a->i[i]; k<a->i[i+1]; k++) { 1163 for (l=0; l<bs; l++) { 1164 #if defined(PETSC_USE_COMPLEX) 1165 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1166 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1167 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1168 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1169 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1170 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1171 } else { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1173 } 1174 #else 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1176 #endif 1177 } 1178 } 1179 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1180 } 1181 } 1182 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1191 { 1192 Mat A = (Mat) Aa; 1193 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1194 PetscErrorCode ierr; 1195 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1196 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1197 MatScalar *aa; 1198 PetscViewer viewer; 1199 1200 PetscFunctionBegin; 1201 1202 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1203 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1204 1205 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1206 1207 /* loop over matrix elements drawing boxes */ 1208 color = PETSC_DRAW_BLUE; 1209 for (i=0,row=0; i<mbs; i++,row+=bs) { 1210 for (j=a->i[i]; j<a->i[i+1]; j++) { 1211 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1212 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1213 aa = a->a + j*bs2; 1214 for (k=0; k<bs; k++) { 1215 for (l=0; l<bs; l++) { 1216 if (PetscRealPart(*aa++) >= 0.) continue; 1217 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1218 } 1219 } 1220 } 1221 } 1222 color = PETSC_DRAW_CYAN; 1223 for (i=0,row=0; i<mbs; i++,row+=bs) { 1224 for (j=a->i[i]; j<a->i[i+1]; j++) { 1225 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1226 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1227 aa = a->a + j*bs2; 1228 for (k=0; k<bs; k++) { 1229 for (l=0; l<bs; l++) { 1230 if (PetscRealPart(*aa++) != 0.) continue; 1231 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1232 } 1233 } 1234 } 1235 } 1236 1237 color = PETSC_DRAW_RED; 1238 for (i=0,row=0; i<mbs; i++,row+=bs) { 1239 for (j=a->i[i]; j<a->i[i+1]; j++) { 1240 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1241 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1242 aa = a->a + j*bs2; 1243 for (k=0; k<bs; k++) { 1244 for (l=0; l<bs; l++) { 1245 if (PetscRealPart(*aa++) <= 0.) continue; 1246 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1247 } 1248 } 1249 } 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1257 { 1258 PetscErrorCode ierr; 1259 PetscReal xl,yl,xr,yr,w,h; 1260 PetscDraw draw; 1261 PetscTruth isnull; 1262 1263 PetscFunctionBegin; 1264 1265 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1266 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1267 1268 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1269 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1270 xr += w; yr += h; xl = -w; yl = -h; 1271 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1272 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1273 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatView_SeqBAIJ" 1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1280 { 1281 PetscErrorCode ierr; 1282 PetscTruth iascii,isbinary,isdraw; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1286 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1287 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1288 if (iascii){ 1289 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1290 } else if (isbinary) { 1291 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1292 } else if (isdraw) { 1293 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1294 } else { 1295 Mat B; 1296 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1297 ierr = MatView(B,viewer);CHKERRQ(ierr); 1298 ierr = MatDestroy(B);CHKERRQ(ierr); 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1307 { 1308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1309 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1310 PetscInt *ai = a->i,*ailen = a->ilen; 1311 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1312 MatScalar *ap,*aa = a->a,zero = 0.0; 1313 1314 PetscFunctionBegin; 1315 for (k=0; k<m; k++) { /* loop over rows */ 1316 row = im[k]; brow = row/bs; 1317 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1318 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1319 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1320 nrow = ailen[brow]; 1321 for (l=0; l<n; l++) { /* loop over columns */ 1322 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1323 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1324 col = in[l] ; 1325 bcol = col/bs; 1326 cidx = col%bs; 1327 ridx = row%bs; 1328 high = nrow; 1329 low = 0; /* assume unsorted */ 1330 while (high-low > 5) { 1331 t = (low+high)/2; 1332 if (rp[t] > bcol) high = t; 1333 else low = t; 1334 } 1335 for (i=low; i<high; i++) { 1336 if (rp[i] > bcol) break; 1337 if (rp[i] == bcol) { 1338 *v++ = ap[bs2*i+bs*cidx+ridx]; 1339 goto finished; 1340 } 1341 } 1342 *v++ = zero; 1343 finished:; 1344 } 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #if defined(PETSC_USE_MAT_SINGLE) 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1353 { 1354 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1355 PetscErrorCode ierr; 1356 PetscInt i,N = m*n*b->bs2; 1357 MatScalar *vsingle; 1358 1359 PetscFunctionBegin; 1360 if (N > b->setvalueslen) { 1361 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1362 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1363 b->setvalueslen = N; 1364 } 1365 vsingle = b->setvaluescopy; 1366 for (i=0; i<N; i++) { 1367 vsingle[i] = v[i]; 1368 } 1369 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 #endif 1373 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1378 { 1379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1380 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1381 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1382 PetscErrorCode ierr; 1383 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1384 PetscTruth roworiented=a->roworiented; 1385 const MatScalar *value = v; 1386 MatScalar *ap,*aa = a->a,*bap; 1387 1388 PetscFunctionBegin; 1389 if (roworiented) { 1390 stepval = (n-1)*bs; 1391 } else { 1392 stepval = (m-1)*bs; 1393 } 1394 for (k=0; k<m; k++) { /* loop over added rows */ 1395 row = im[k]; 1396 if (row < 0) continue; 1397 #if defined(PETSC_USE_DEBUG) 1398 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1399 #endif 1400 rp = aj + ai[row]; 1401 ap = aa + bs2*ai[row]; 1402 rmax = imax[row]; 1403 nrow = ailen[row]; 1404 low = 0; 1405 high = nrow; 1406 for (l=0; l<n; l++) { /* loop over added columns */ 1407 if (in[l] < 0) continue; 1408 #if defined(PETSC_USE_DEBUG) 1409 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1410 #endif 1411 col = in[l]; 1412 if (roworiented) { 1413 value = v + k*(stepval+bs)*bs + l*bs; 1414 } else { 1415 value = v + l*(stepval+bs)*bs + k*bs; 1416 } 1417 if (col <= lastcol) low = 0; else high = nrow; 1418 lastcol = col; 1419 while (high-low > 7) { 1420 t = (low+high)/2; 1421 if (rp[t] > col) high = t; 1422 else low = t; 1423 } 1424 for (i=low; i<high; i++) { 1425 if (rp[i] > col) break; 1426 if (rp[i] == col) { 1427 bap = ap + bs2*i; 1428 if (roworiented) { 1429 if (is == ADD_VALUES) { 1430 for (ii=0; ii<bs; ii++,value+=stepval) { 1431 for (jj=ii; jj<bs2; jj+=bs) { 1432 bap[jj] += *value++; 1433 } 1434 } 1435 } else { 1436 for (ii=0; ii<bs; ii++,value+=stepval) { 1437 for (jj=ii; jj<bs2; jj+=bs) { 1438 bap[jj] = *value++; 1439 } 1440 } 1441 } 1442 } else { 1443 if (is == ADD_VALUES) { 1444 for (ii=0; ii<bs; ii++,value+=stepval) { 1445 for (jj=0; jj<bs; jj++) { 1446 *bap++ += *value++; 1447 } 1448 } 1449 } else { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=0; jj<bs; jj++) { 1452 *bap++ = *value++; 1453 } 1454 } 1455 } 1456 } 1457 goto noinsert2; 1458 } 1459 } 1460 if (nonew == 1) goto noinsert2; 1461 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1462 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1463 N = nrow++ - 1; 1464 /* shift up all the later entries in this row */ 1465 for (ii=N; ii>=i; ii--) { 1466 rp[ii+1] = rp[ii]; 1467 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1468 } 1469 if (N >= i) { 1470 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1471 } 1472 rp[i] = col; 1473 bap = ap + bs2*i; 1474 if (roworiented) { 1475 for (ii=0; ii<bs; ii++,value+=stepval) { 1476 for (jj=ii; jj<bs2; jj+=bs) { 1477 bap[jj] = *value++; 1478 } 1479 } 1480 } else { 1481 for (ii=0; ii<bs; ii++,value+=stepval) { 1482 for (jj=0; jj<bs; jj++) { 1483 *bap++ = *value++; 1484 } 1485 } 1486 } 1487 noinsert2:; 1488 low = i; 1489 } 1490 ailen[row] = nrow; 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1498 { 1499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1500 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1501 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1502 PetscErrorCode ierr; 1503 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1504 MatScalar *aa = a->a,*ap; 1505 PetscReal ratio=0.6; 1506 1507 PetscFunctionBegin; 1508 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1509 1510 if (m) rmax = ailen[0]; 1511 for (i=1; i<mbs; i++) { 1512 /* move each row back by the amount of empty slots (fshift) before it*/ 1513 fshift += imax[i-1] - ailen[i-1]; 1514 rmax = PetscMax(rmax,ailen[i]); 1515 if (fshift) { 1516 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1517 N = ailen[i]; 1518 for (j=0; j<N; j++) { 1519 ip[j-fshift] = ip[j]; 1520 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1521 } 1522 } 1523 ai[i] = ai[i-1] + ailen[i-1]; 1524 } 1525 if (mbs) { 1526 fshift += imax[mbs-1] - ailen[mbs-1]; 1527 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1528 } 1529 /* reset ilen and imax for each row */ 1530 for (i=0; i<mbs; i++) { 1531 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1532 } 1533 a->nz = ai[mbs]; 1534 1535 /* diagonals may have moved, so kill the diagonal pointers */ 1536 a->idiagvalid = PETSC_FALSE; 1537 if (fshift && a->diag) { 1538 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1539 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1540 a->diag = 0; 1541 } 1542 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1543 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1544 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1545 a->reallocs = 0; 1546 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1547 1548 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1549 if (a->compressedrow.use){ 1550 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1551 } 1552 1553 A->same_nonzero = PETSC_TRUE; 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* 1558 This function returns an array of flags which indicate the locations of contiguous 1559 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1560 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1561 Assume: sizes should be long enough to hold all the values. 1562 */ 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1565 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1566 { 1567 PetscInt i,j,k,row; 1568 PetscTruth flg; 1569 1570 PetscFunctionBegin; 1571 for (i=0,j=0; i<n; j++) { 1572 row = idx[i]; 1573 if (row%bs!=0) { /* Not the begining of a block */ 1574 sizes[j] = 1; 1575 i++; 1576 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1577 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1578 i++; 1579 } else { /* Begining of the block, so check if the complete block exists */ 1580 flg = PETSC_TRUE; 1581 for (k=1; k<bs; k++) { 1582 if (row+k != idx[i+k]) { /* break in the block */ 1583 flg = PETSC_FALSE; 1584 break; 1585 } 1586 } 1587 if (flg) { /* No break in the bs */ 1588 sizes[j] = bs; 1589 i+= bs; 1590 } else { 1591 sizes[j] = 1; 1592 i++; 1593 } 1594 } 1595 } 1596 *bs_max = j; 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1602 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1603 { 1604 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1605 PetscErrorCode ierr; 1606 PetscInt i,j,k,count,*rows; 1607 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1608 PetscScalar zero = 0.0; 1609 MatScalar *aa; 1610 1611 PetscFunctionBegin; 1612 /* Make a copy of the IS and sort it */ 1613 /* allocate memory for rows,sizes */ 1614 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1615 sizes = rows + is_n; 1616 1617 /* copy IS values to rows, and sort them */ 1618 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1619 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1620 if (baij->keepzeroedrows) { 1621 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1622 bs_max = is_n; 1623 A->same_nonzero = PETSC_TRUE; 1624 } else { 1625 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1626 A->same_nonzero = PETSC_FALSE; 1627 } 1628 1629 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1630 row = rows[j]; 1631 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1632 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1633 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1634 if (sizes[i] == bs && !baij->keepzeroedrows) { 1635 if (diag != 0.0) { 1636 if (baij->ilen[row/bs] > 0) { 1637 baij->ilen[row/bs] = 1; 1638 baij->j[baij->i[row/bs]] = row/bs; 1639 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1640 } 1641 /* Now insert all the diagonal values for this bs */ 1642 for (k=0; k<bs; k++) { 1643 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1644 } 1645 } else { /* (diag == 0.0) */ 1646 baij->ilen[row/bs] = 0; 1647 } /* end (diag == 0.0) */ 1648 } else { /* (sizes[i] != bs) */ 1649 #if defined (PETSC_USE_DEBUG) 1650 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1651 #endif 1652 for (k=0; k<count; k++) { 1653 aa[0] = zero; 1654 aa += bs; 1655 } 1656 if (diag != 0.0) { 1657 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1658 } 1659 } 1660 } 1661 1662 ierr = PetscFree(rows);CHKERRQ(ierr); 1663 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1669 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1670 { 1671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1672 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1673 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1674 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1675 PetscErrorCode ierr; 1676 PetscInt ridx,cidx,bs2=a->bs2; 1677 PetscTruth roworiented=a->roworiented; 1678 MatScalar *ap,value,*aa=a->a,*bap; 1679 1680 PetscFunctionBegin; 1681 for (k=0; k<m; k++) { /* loop over added rows */ 1682 row = im[k]; 1683 brow = row/bs; 1684 if (row < 0) continue; 1685 #if defined(PETSC_USE_DEBUG) 1686 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1687 #endif 1688 rp = aj + ai[brow]; 1689 ap = aa + bs2*ai[brow]; 1690 rmax = imax[brow]; 1691 nrow = ailen[brow]; 1692 low = 0; 1693 high = nrow; 1694 for (l=0; l<n; l++) { /* loop over added columns */ 1695 if (in[l] < 0) continue; 1696 #if defined(PETSC_USE_DEBUG) 1697 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1698 #endif 1699 col = in[l]; bcol = col/bs; 1700 ridx = row % bs; cidx = col % bs; 1701 if (roworiented) { 1702 value = v[l + k*n]; 1703 } else { 1704 value = v[k + l*m]; 1705 } 1706 if (col <= lastcol) low = 0; else high = nrow; 1707 lastcol = col; 1708 while (high-low > 7) { 1709 t = (low+high)/2; 1710 if (rp[t] > bcol) high = t; 1711 else low = t; 1712 } 1713 for (i=low; i<high; i++) { 1714 if (rp[i] > bcol) break; 1715 if (rp[i] == bcol) { 1716 bap = ap + bs2*i + bs*cidx + ridx; 1717 if (is == ADD_VALUES) *bap += value; 1718 else *bap = value; 1719 goto noinsert1; 1720 } 1721 } 1722 if (nonew == 1) goto noinsert1; 1723 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1724 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1725 N = nrow++ - 1; 1726 /* shift up all the later entries in this row */ 1727 for (ii=N; ii>=i; ii--) { 1728 rp[ii+1] = rp[ii]; 1729 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1730 } 1731 if (N>=i) { 1732 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1733 } 1734 rp[i] = bcol; 1735 ap[bs2*i + bs*cidx + ridx] = value; 1736 a->nz++; 1737 noinsert1:; 1738 low = i; 1739 } 1740 ailen[brow] = nrow; 1741 } 1742 A->same_nonzero = PETSC_FALSE; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1749 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1750 { 1751 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1752 Mat outA; 1753 PetscErrorCode ierr; 1754 PetscTruth row_identity,col_identity; 1755 1756 PetscFunctionBegin; 1757 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1758 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1759 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1760 if (!row_identity || !col_identity) { 1761 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1762 } 1763 1764 outA = inA; 1765 inA->factor = FACTOR_LU; 1766 1767 if (!a->diag) { 1768 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1769 } 1770 1771 a->row = row; 1772 a->col = col; 1773 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1774 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1775 1776 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1777 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1778 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1779 1780 /* 1781 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1782 for ILU(0) factorization with natural ordering 1783 */ 1784 if (inA->bs < 8) { 1785 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1786 } else { 1787 if (!a->solve_work) { 1788 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1789 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1790 } 1791 } 1792 1793 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1794 1795 PetscFunctionReturn(0); 1796 } 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1799 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1800 { 1801 static PetscTruth called = PETSC_FALSE; 1802 MPI_Comm comm = A->comm; 1803 PetscErrorCode ierr; 1804 1805 PetscFunctionBegin; 1806 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1807 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1808 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 EXTERN_C_BEGIN 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1815 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1816 { 1817 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1818 PetscInt i,nz,nbs; 1819 1820 PetscFunctionBegin; 1821 nz = baij->maxnz/baij->bs2; 1822 nbs = baij->nbs; 1823 for (i=0; i<nz; i++) { 1824 baij->j[i] = indices[i]; 1825 } 1826 baij->nz = nz; 1827 for (i=0; i<nbs; i++) { 1828 baij->ilen[i] = baij->imax[i]; 1829 } 1830 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1837 /*@ 1838 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1839 in the matrix. 1840 1841 Input Parameters: 1842 + mat - the SeqBAIJ matrix 1843 - indices - the column indices 1844 1845 Level: advanced 1846 1847 Notes: 1848 This can be called if you have precomputed the nonzero structure of the 1849 matrix and want to provide it to the matrix object to improve the performance 1850 of the MatSetValues() operation. 1851 1852 You MUST have set the correct numbers of nonzeros per row in the call to 1853 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1854 1855 MUST be called before any calls to MatSetValues(); 1856 1857 @*/ 1858 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1859 { 1860 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1861 1862 PetscFunctionBegin; 1863 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1864 PetscValidPointer(indices,2); 1865 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1866 if (f) { 1867 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1868 } else { 1869 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1876 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1877 { 1878 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1879 PetscErrorCode ierr; 1880 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1881 PetscReal atmp; 1882 PetscScalar *x,zero = 0.0; 1883 MatScalar *aa; 1884 PetscInt ncols,brow,krow,kcol; 1885 1886 PetscFunctionBegin; 1887 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1888 bs = A->bs; 1889 aa = a->a; 1890 ai = a->i; 1891 aj = a->j; 1892 mbs = a->mbs; 1893 1894 ierr = VecSet(v,zero);CHKERRQ(ierr); 1895 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1896 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1897 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1898 for (i=0; i<mbs; i++) { 1899 ncols = ai[1] - ai[0]; ai++; 1900 brow = bs*i; 1901 for (j=0; j<ncols; j++){ 1902 /* bcol = bs*(*aj); */ 1903 for (kcol=0; kcol<bs; kcol++){ 1904 for (krow=0; krow<bs; krow++){ 1905 atmp = PetscAbsScalar(*aa); aa++; 1906 row = brow + krow; /* row index */ 1907 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1908 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1909 } 1910 } 1911 aj++; 1912 } 1913 } 1914 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatCopy_SeqBAIJ" 1920 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1921 { 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 /* If the two matrices have the same copy implementation, use fast copy. */ 1926 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1927 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1928 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1929 1930 if (a->i[A->m] != b->i[B->m]) { 1931 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1932 } 1933 ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr); 1934 } else { 1935 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1936 } 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1942 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1943 { 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1953 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1954 { 1955 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1956 PetscFunctionBegin; 1957 *array = a->a; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1963 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1964 { 1965 PetscFunctionBegin; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #include "petscblaslapack.h" 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1972 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1973 { 1974 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1975 PetscErrorCode ierr; 1976 PetscInt i,bs=Y->bs,j,bs2; 1977 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1978 1979 PetscFunctionBegin; 1980 if (str == SAME_NONZERO_PATTERN) { 1981 PetscScalar alpha = a; 1982 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1983 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1984 if (y->xtoy && y->XtoY != X) { 1985 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1986 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1987 } 1988 if (!y->xtoy) { /* get xtoy */ 1989 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1990 y->XtoY = X; 1991 } 1992 bs2 = bs*bs; 1993 for (i=0; i<x->nz; i++) { 1994 j = 0; 1995 while (j < bs2){ 1996 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1997 j++; 1998 } 1999 } 2000 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2001 } else { 2002 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2009 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2010 { 2011 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2012 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2013 PetscScalar *aa = a->a; 2014 2015 PetscFunctionBegin; 2016 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2022 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2023 { 2024 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2025 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2026 PetscScalar *aa = a->a; 2027 2028 PetscFunctionBegin; 2029 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 2034 /* -------------------------------------------------------------------*/ 2035 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2036 MatGetRow_SeqBAIJ, 2037 MatRestoreRow_SeqBAIJ, 2038 MatMult_SeqBAIJ_N, 2039 /* 4*/ MatMultAdd_SeqBAIJ_N, 2040 MatMultTranspose_SeqBAIJ, 2041 MatMultTransposeAdd_SeqBAIJ, 2042 MatSolve_SeqBAIJ_N, 2043 0, 2044 0, 2045 /*10*/ 0, 2046 MatLUFactor_SeqBAIJ, 2047 0, 2048 0, 2049 MatTranspose_SeqBAIJ, 2050 /*15*/ MatGetInfo_SeqBAIJ, 2051 MatEqual_SeqBAIJ, 2052 MatGetDiagonal_SeqBAIJ, 2053 MatDiagonalScale_SeqBAIJ, 2054 MatNorm_SeqBAIJ, 2055 /*20*/ 0, 2056 MatAssemblyEnd_SeqBAIJ, 2057 0, 2058 MatSetOption_SeqBAIJ, 2059 MatZeroEntries_SeqBAIJ, 2060 /*25*/ MatZeroRows_SeqBAIJ, 2061 MatLUFactorSymbolic_SeqBAIJ, 2062 MatLUFactorNumeric_SeqBAIJ_N, 2063 MatCholeskyFactorSymbolic_SeqBAIJ, 2064 MatCholeskyFactorNumeric_SeqBAIJ_N, 2065 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2066 MatILUFactorSymbolic_SeqBAIJ, 2067 MatICCFactorSymbolic_SeqBAIJ, 2068 MatGetArray_SeqBAIJ, 2069 MatRestoreArray_SeqBAIJ, 2070 /*35*/ MatDuplicate_SeqBAIJ, 2071 0, 2072 0, 2073 MatILUFactor_SeqBAIJ, 2074 0, 2075 /*40*/ MatAXPY_SeqBAIJ, 2076 MatGetSubMatrices_SeqBAIJ, 2077 MatIncreaseOverlap_SeqBAIJ, 2078 MatGetValues_SeqBAIJ, 2079 MatCopy_SeqBAIJ, 2080 /*45*/ MatPrintHelp_SeqBAIJ, 2081 MatScale_SeqBAIJ, 2082 0, 2083 0, 2084 0, 2085 /*50*/ 0, 2086 MatGetRowIJ_SeqBAIJ, 2087 MatRestoreRowIJ_SeqBAIJ, 2088 0, 2089 0, 2090 /*55*/ 0, 2091 0, 2092 0, 2093 0, 2094 MatSetValuesBlocked_SeqBAIJ, 2095 /*60*/ MatGetSubMatrix_SeqBAIJ, 2096 MatDestroy_SeqBAIJ, 2097 MatView_SeqBAIJ, 2098 MatGetPetscMaps_Petsc, 2099 0, 2100 /*65*/ 0, 2101 0, 2102 0, 2103 0, 2104 0, 2105 /*70*/ MatGetRowMax_SeqBAIJ, 2106 MatConvert_Basic, 2107 0, 2108 0, 2109 0, 2110 /*75*/ 0, 2111 0, 2112 0, 2113 0, 2114 0, 2115 /*80*/ 0, 2116 0, 2117 0, 2118 0, 2119 MatLoad_SeqBAIJ, 2120 /*85*/ 0, 2121 0, 2122 0, 2123 0, 2124 0, 2125 /*90*/ 0, 2126 0, 2127 0, 2128 0, 2129 0, 2130 /*95*/ 0, 2131 0, 2132 0, 2133 0, 2134 0, 2135 /*100*/0, 2136 0, 2137 0, 2138 0, 2139 0, 2140 /*105*/0, 2141 MatRealPart_SeqBAIJ, 2142 MatImaginaryPart_SeqBAIJ 2143 }; 2144 2145 EXTERN_C_BEGIN 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2148 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2149 { 2150 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2151 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 if (aij->nonew != 1) { 2156 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2157 } 2158 2159 /* allocate space for values if not already there */ 2160 if (!aij->saved_values) { 2161 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2162 } 2163 2164 /* copy values over */ 2165 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 EXTERN_C_END 2169 2170 EXTERN_C_BEGIN 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2173 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2174 { 2175 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2176 PetscErrorCode ierr; 2177 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2178 2179 PetscFunctionBegin; 2180 if (aij->nonew != 1) { 2181 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2182 } 2183 if (!aij->saved_values) { 2184 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2185 } 2186 2187 /* copy values over */ 2188 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2189 PetscFunctionReturn(0); 2190 } 2191 EXTERN_C_END 2192 2193 EXTERN_C_BEGIN 2194 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2195 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2196 EXTERN_C_END 2197 2198 EXTERN_C_BEGIN 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2201 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2202 { 2203 Mat_SeqBAIJ *b; 2204 PetscErrorCode ierr; 2205 PetscInt i,mbs,nbs,bs2,newbs = bs; 2206 PetscTruth flg,skipallocation = PETSC_FALSE; 2207 2208 PetscFunctionBegin; 2209 2210 if (nz == MAT_SKIP_ALLOCATION) { 2211 skipallocation = PETSC_TRUE; 2212 nz = 0; 2213 } 2214 2215 B->preallocated = PETSC_TRUE; 2216 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2217 if (nnz && newbs != bs) { 2218 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2219 } 2220 bs = newbs; 2221 2222 mbs = B->m/bs; 2223 nbs = B->n/bs; 2224 bs2 = bs*bs; 2225 2226 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2227 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2228 } 2229 2230 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2231 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2232 if (nnz) { 2233 for (i=0; i<mbs; i++) { 2234 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2235 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2236 } 2237 } 2238 2239 b = (Mat_SeqBAIJ*)B->data; 2240 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2241 B->ops->solve = MatSolve_SeqBAIJ_Update; 2242 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2243 if (!flg) { 2244 switch (bs) { 2245 case 1: 2246 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2247 B->ops->mult = MatMult_SeqBAIJ_1; 2248 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2249 break; 2250 case 2: 2251 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2252 B->ops->mult = MatMult_SeqBAIJ_2; 2253 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2254 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2255 break; 2256 case 3: 2257 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2258 B->ops->mult = MatMult_SeqBAIJ_3; 2259 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2260 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2261 break; 2262 case 4: 2263 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2264 B->ops->mult = MatMult_SeqBAIJ_4; 2265 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2266 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2267 break; 2268 case 5: 2269 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2270 B->ops->mult = MatMult_SeqBAIJ_5; 2271 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2272 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2273 break; 2274 case 6: 2275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2276 B->ops->mult = MatMult_SeqBAIJ_6; 2277 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2278 break; 2279 case 7: 2280 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2281 B->ops->mult = MatMult_SeqBAIJ_7; 2282 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2283 break; 2284 default: 2285 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2286 B->ops->mult = MatMult_SeqBAIJ_N; 2287 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2288 break; 2289 } 2290 } 2291 B->bs = bs; 2292 b->mbs = mbs; 2293 b->nbs = nbs; 2294 if (!skipallocation) { 2295 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2296 /* b->ilen will count nonzeros in each block row so far. */ 2297 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2298 if (!nnz) { 2299 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2300 else if (nz <= 0) nz = 1; 2301 for (i=0; i<mbs; i++) b->imax[i] = nz; 2302 nz = nz*mbs; 2303 } else { 2304 nz = 0; 2305 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2306 } 2307 2308 /* allocate the matrix space */ 2309 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 2310 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2311 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2312 b->singlemalloc = PETSC_TRUE; 2313 2314 b->i[0] = 0; 2315 for (i=1; i<mbs+1; i++) { 2316 b->i[i] = b->i[i-1] + b->imax[i-1]; 2317 } 2318 } 2319 2320 B->bs = bs; 2321 b->bs2 = bs2; 2322 b->mbs = mbs; 2323 b->nz = 0; 2324 b->maxnz = nz*bs2; 2325 B->info.nz_unneeded = (PetscReal)b->maxnz; 2326 PetscFunctionReturn(0); 2327 } 2328 EXTERN_C_END 2329 2330 /*MC 2331 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2332 block sparse compressed row format. 2333 2334 Options Database Keys: 2335 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2336 2337 Level: beginner 2338 2339 .Seealso: MatCreateSeqBAIJ 2340 M*/ 2341 2342 EXTERN_C_BEGIN 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatCreate_SeqBAIJ" 2345 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2346 { 2347 PetscErrorCode ierr; 2348 PetscMPIInt size; 2349 Mat_SeqBAIJ *b; 2350 2351 PetscFunctionBegin; 2352 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2353 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2354 2355 B->m = B->M = PetscMax(B->m,B->M); 2356 B->n = B->N = PetscMax(B->n,B->N); 2357 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2358 B->data = (void*)b; 2359 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2360 B->factor = 0; 2361 B->mapping = 0; 2362 b->row = 0; 2363 b->col = 0; 2364 b->icol = 0; 2365 b->reallocs = 0; 2366 b->saved_values = 0; 2367 #if defined(PETSC_USE_MAT_SINGLE) 2368 b->setvalueslen = 0; 2369 b->setvaluescopy = PETSC_NULL; 2370 #endif 2371 2372 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2373 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2374 2375 b->sorted = PETSC_FALSE; 2376 b->roworiented = PETSC_TRUE; 2377 b->nonew = 0; 2378 b->diag = 0; 2379 b->solve_work = 0; 2380 b->mult_work = 0; 2381 B->spptr = 0; 2382 B->info.nz_unneeded = (PetscReal)b->maxnz; 2383 b->keepzeroedrows = PETSC_FALSE; 2384 b->xtoy = 0; 2385 b->XtoY = 0; 2386 b->compressedrow.use = PETSC_FALSE; 2387 b->compressedrow.nrows = 0; 2388 b->compressedrow.i = PETSC_NULL; 2389 b->compressedrow.rindex = PETSC_NULL; 2390 b->compressedrow.checked = PETSC_FALSE; 2391 B->same_nonzero = PETSC_FALSE; 2392 2393 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2394 "MatInvertBlockDiagonal_SeqBAIJ", 2395 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2397 "MatStoreValues_SeqBAIJ", 2398 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2400 "MatRetrieveValues_SeqBAIJ", 2401 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2403 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2404 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2405 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2406 "MatConvert_SeqBAIJ_SeqAIJ", 2407 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2409 "MatConvert_SeqBAIJ_SeqSBAIJ", 2410 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2412 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2413 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 EXTERN_C_END 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2420 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2421 { 2422 Mat C; 2423 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2424 PetscErrorCode ierr; 2425 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2426 2427 PetscFunctionBegin; 2428 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2429 2430 *B = 0; 2431 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2432 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 2433 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2434 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2435 c = (Mat_SeqBAIJ*)C->data; 2436 2437 C->M = A->M; 2438 C->N = A->N; 2439 C->bs = A->bs; 2440 c->bs2 = a->bs2; 2441 c->mbs = a->mbs; 2442 c->nbs = a->nbs; 2443 2444 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2445 for (i=0; i<mbs; i++) { 2446 c->imax[i] = a->imax[i]; 2447 c->ilen[i] = a->ilen[i]; 2448 } 2449 2450 /* allocate the matrix space */ 2451 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2452 c->singlemalloc = PETSC_TRUE; 2453 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2454 if (mbs > 0) { 2455 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2456 if (cpvalues == MAT_COPY_VALUES) { 2457 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2458 } else { 2459 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2460 } 2461 } 2462 c->sorted = a->sorted; 2463 c->roworiented = a->roworiented; 2464 c->nonew = a->nonew; 2465 2466 if (a->diag) { 2467 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2468 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2469 for (i=0; i<mbs; i++) { 2470 c->diag[i] = a->diag[i]; 2471 } 2472 } else c->diag = 0; 2473 c->nz = a->nz; 2474 c->maxnz = a->maxnz; 2475 c->solve_work = 0; 2476 c->mult_work = 0; 2477 C->preallocated = PETSC_TRUE; 2478 C->assembled = PETSC_TRUE; 2479 2480 c->compressedrow.use = a->compressedrow.use; 2481 c->compressedrow.nrows = a->compressedrow.nrows; 2482 c->compressedrow.checked = a->compressedrow.checked; 2483 if ( a->compressedrow.checked && a->compressedrow.use){ 2484 i = a->compressedrow.nrows; 2485 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2486 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2487 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2488 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2489 } else { 2490 c->compressedrow.use = PETSC_FALSE; 2491 c->compressedrow.i = PETSC_NULL; 2492 c->compressedrow.rindex = PETSC_NULL; 2493 } 2494 C->same_nonzero = A->same_nonzero; 2495 *B = C; 2496 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatLoad_SeqBAIJ" 2502 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2503 { 2504 Mat_SeqBAIJ *a; 2505 Mat B; 2506 PetscErrorCode ierr; 2507 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2508 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2509 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2510 PetscInt *masked,nmask,tmp,bs2,ishift; 2511 PetscMPIInt size; 2512 int fd; 2513 PetscScalar *aa; 2514 MPI_Comm comm = ((PetscObject)viewer)->comm; 2515 2516 PetscFunctionBegin; 2517 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2518 bs2 = bs*bs; 2519 2520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2521 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2522 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2523 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2524 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2525 M = header[1]; N = header[2]; nz = header[3]; 2526 2527 if (header[3] < 0) { 2528 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2529 } 2530 2531 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2532 2533 /* 2534 This code adds extra rows to make sure the number of rows is 2535 divisible by the blocksize 2536 */ 2537 mbs = M/bs; 2538 extra_rows = bs - M + bs*(mbs); 2539 if (extra_rows == bs) extra_rows = 0; 2540 else mbs++; 2541 if (extra_rows) { 2542 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2543 } 2544 2545 /* read in row lengths */ 2546 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2547 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2548 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2549 2550 /* read in column indices */ 2551 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2552 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2553 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2554 2555 /* loop over row lengths determining block row lengths */ 2556 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2557 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2558 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2559 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2560 masked = mask + mbs; 2561 rowcount = 0; nzcount = 0; 2562 for (i=0; i<mbs; i++) { 2563 nmask = 0; 2564 for (j=0; j<bs; j++) { 2565 kmax = rowlengths[rowcount]; 2566 for (k=0; k<kmax; k++) { 2567 tmp = jj[nzcount++]/bs; 2568 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2569 } 2570 rowcount++; 2571 } 2572 browlengths[i] += nmask; 2573 /* zero out the mask elements we set */ 2574 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2575 } 2576 2577 /* create our matrix */ 2578 ierr = MatCreate(comm,&B); 2579 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2580 ierr = MatSetType(B,type);CHKERRQ(ierr); 2581 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2582 a = (Mat_SeqBAIJ*)B->data; 2583 2584 /* set matrix "i" values */ 2585 a->i[0] = 0; 2586 for (i=1; i<= mbs; i++) { 2587 a->i[i] = a->i[i-1] + browlengths[i-1]; 2588 a->ilen[i-1] = browlengths[i-1]; 2589 } 2590 a->nz = 0; 2591 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2592 2593 /* read in nonzero values */ 2594 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2595 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2596 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2597 2598 /* set "a" and "j" values into matrix */ 2599 nzcount = 0; jcount = 0; 2600 for (i=0; i<mbs; i++) { 2601 nzcountb = nzcount; 2602 nmask = 0; 2603 for (j=0; j<bs; j++) { 2604 kmax = rowlengths[i*bs+j]; 2605 for (k=0; k<kmax; k++) { 2606 tmp = jj[nzcount++]/bs; 2607 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2608 } 2609 } 2610 /* sort the masked values */ 2611 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2612 2613 /* set "j" values into matrix */ 2614 maskcount = 1; 2615 for (j=0; j<nmask; j++) { 2616 a->j[jcount++] = masked[j]; 2617 mask[masked[j]] = maskcount++; 2618 } 2619 /* set "a" values into matrix */ 2620 ishift = bs2*a->i[i]; 2621 for (j=0; j<bs; j++) { 2622 kmax = rowlengths[i*bs+j]; 2623 for (k=0; k<kmax; k++) { 2624 tmp = jj[nzcountb]/bs ; 2625 block = mask[tmp] - 1; 2626 point = jj[nzcountb] - bs*tmp; 2627 idx = ishift + bs2*block + j + bs*point; 2628 a->a[idx] = (MatScalar)aa[nzcountb++]; 2629 } 2630 } 2631 /* zero out the mask elements we set */ 2632 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2633 } 2634 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2635 2636 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2637 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2638 ierr = PetscFree(aa);CHKERRQ(ierr); 2639 ierr = PetscFree(jj);CHKERRQ(ierr); 2640 ierr = PetscFree(mask);CHKERRQ(ierr); 2641 2642 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2643 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2644 ierr = MatView_Private(B);CHKERRQ(ierr); 2645 2646 *A = B; 2647 PetscFunctionReturn(0); 2648 } 2649 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "MatCreateSeqBAIJ" 2652 /*@C 2653 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2654 compressed row) format. For good matrix assembly performance the 2655 user should preallocate the matrix storage by setting the parameter nz 2656 (or the array nnz). By setting these parameters accurately, performance 2657 during matrix assembly can be increased by more than a factor of 50. 2658 2659 Collective on MPI_Comm 2660 2661 Input Parameters: 2662 + comm - MPI communicator, set to PETSC_COMM_SELF 2663 . bs - size of block 2664 . m - number of rows 2665 . n - number of columns 2666 . nz - number of nonzero blocks per block row (same for all rows) 2667 - nnz - array containing the number of nonzero blocks in the various block rows 2668 (possibly different for each block row) or PETSC_NULL 2669 2670 Output Parameter: 2671 . A - the matrix 2672 2673 Options Database Keys: 2674 . -mat_no_unroll - uses code that does not unroll the loops in the 2675 block calculations (much slower) 2676 . -mat_block_size - size of the blocks to use 2677 2678 Level: intermediate 2679 2680 Notes: 2681 The number of rows and columns must be divisible by blocksize. 2682 2683 If the nnz parameter is given then the nz parameter is ignored 2684 2685 A nonzero block is any block that as 1 or more nonzeros in it 2686 2687 The block AIJ format is fully compatible with standard Fortran 77 2688 storage. That is, the stored row and column indices can begin at 2689 either one (as in Fortran) or zero. See the users' manual for details. 2690 2691 Specify the preallocated storage with either nz or nnz (not both). 2692 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2693 allocation. For additional details, see the users manual chapter on 2694 matrices. 2695 2696 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2697 @*/ 2698 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2699 { 2700 PetscErrorCode ierr; 2701 2702 PetscFunctionBegin; 2703 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2704 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2705 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2706 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2712 /*@C 2713 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2714 per row in the matrix. For good matrix assembly performance the 2715 user should preallocate the matrix storage by setting the parameter nz 2716 (or the array nnz). By setting these parameters accurately, performance 2717 during matrix assembly can be increased by more than a factor of 50. 2718 2719 Collective on MPI_Comm 2720 2721 Input Parameters: 2722 + A - the matrix 2723 . bs - size of block 2724 . nz - number of block nonzeros per block row (same for all rows) 2725 - nnz - array containing the number of block nonzeros in the various block rows 2726 (possibly different for each block row) or PETSC_NULL 2727 2728 Options Database Keys: 2729 . -mat_no_unroll - uses code that does not unroll the loops in the 2730 block calculations (much slower) 2731 . -mat_block_size - size of the blocks to use 2732 2733 Level: intermediate 2734 2735 Notes: 2736 If the nnz parameter is given then the nz parameter is ignored 2737 2738 The block AIJ format is fully compatible with standard Fortran 77 2739 storage. That is, the stored row and column indices can begin at 2740 either one (as in Fortran) or zero. See the users' manual for details. 2741 2742 Specify the preallocated storage with either nz or nnz (not both). 2743 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2744 allocation. For additional details, see the users manual chapter on 2745 matrices. 2746 2747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2748 @*/ 2749 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2750 { 2751 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2752 2753 PetscFunctionBegin; 2754 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2755 if (f) { 2756 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2757 } 2758 PetscFunctionReturn(0); 2759 } 2760 2761