1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = a->bs,mbs = a->mbs; 18 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 19 20 PetscFunctionBegin; 21 if (a->idiagvalid) PetscFunctionReturn(0); 22 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 23 diag_offset = a->diag; 24 if (!a->idiag) { 25 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 26 } 27 diag = a->idiag; 28 mdiag = a->idiag+bs*bs*mbs; 29 /* factor and invert each block */ 30 switch (a->bs){ 31 case 2: 32 for (i=0; i<mbs; i++) { 33 odiag = v + 4*diag_offset[i]; 34 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 35 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 36 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 37 diag += 4; 38 mdiag += 4; 39 } 40 break; 41 case 3: 42 for (i=0; i<mbs; i++) { 43 odiag = v + 9*diag_offset[i]; 44 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 45 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 46 diag[8] = odiag[8]; 47 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 48 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 49 mdiag[8] = odiag[8]; 50 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 51 diag += 9; 52 mdiag += 9; 53 } 54 break; 55 case 4: 56 for (i=0; i<mbs; i++) { 57 odiag = v + 16*diag_offset[i]; 58 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 61 diag += 16; 62 mdiag += 16; 63 } 64 break; 65 case 5: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 25*diag_offset[i]; 68 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 71 diag += 25; 72 mdiag += 25; 73 } 74 break; 75 default: 76 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs); 77 } 78 a->idiagvalid = PETSC_TRUE; 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 87 PetscScalar *x,x1,x2,s1,s2; 88 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 89 PetscErrorCode ierr; 90 PetscInt m = a->mbs,i,i2,nz,idx; 91 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 PetscErrorCode ierr; 192 PetscInt m = a->mbs,i,i2,nz,idx; 193 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 194 195 PetscFunctionBegin; 196 its = its*lits; 197 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 198 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 199 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 200 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 201 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 202 203 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 204 205 diag = a->diag; 206 idiag = a->idiag; 207 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 210 if (flag & SOR_ZERO_INITIAL_GUESS) { 211 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 212 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 213 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 214 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 215 i2 = 3; 216 idiag += 9; 217 for (i=1; i<m; i++) { 218 v = aa + 9*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 222 while (nz--) { 223 idx = 3*(*vi++); 224 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 225 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 226 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 227 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 228 v += 9; 229 } 230 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 231 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 232 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 233 idiag += 9; 234 i2 += 3; 235 } 236 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 237 PetscLogFlops(9*(a->nz)); 238 } 239 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 240 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 241 i2 = 0; 242 mdiag = a->idiag+9*a->mbs; 243 for (i=0; i<m; i++) { 244 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 245 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 246 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 247 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 248 mdiag += 9; 249 i2 += 3; 250 } 251 PetscLogFlops(15*m); 252 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 253 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 256 idiag = a->idiag+9*a->mbs - 9; 257 i2 = 3*m - 3; 258 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 259 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 260 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 261 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 262 idiag -= 9; 263 i2 -= 3; 264 for (i=m-2; i>=0; i--) { 265 v = aa + 9*(diag[i]+1); 266 vi = aj + diag[i] + 1; 267 nz = ai[i+1] - diag[i] - 1; 268 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 269 while (nz--) { 270 idx = 3*(*vi++); 271 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 272 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 273 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 274 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 275 v += 9; 276 } 277 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 278 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 279 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 280 idiag -= 9; 281 i2 -= 3; 282 } 283 PetscLogFlops(9*(a->nz)); 284 } 285 } else { 286 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 287 } 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 296 { 297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 298 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 299 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 300 PetscErrorCode ierr; 301 PetscInt m = a->mbs,i,i2,nz,idx; 302 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 303 304 PetscFunctionBegin; 305 its = its*lits; 306 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 307 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 308 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 309 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 310 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 311 312 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 313 314 diag = a->diag; 315 idiag = a->idiag; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 318 319 if (flag & SOR_ZERO_INITIAL_GUESS) { 320 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 321 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 322 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 323 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 324 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 325 i2 = 4; 326 idiag += 16; 327 for (i=1; i<m; i++) { 328 v = aa + 16*ai[i]; 329 vi = aj + ai[i]; 330 nz = diag[i] - ai[i]; 331 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 332 while (nz--) { 333 idx = 4*(*vi++); 334 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 335 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 336 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 337 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 338 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 339 v += 16; 340 } 341 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 342 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 343 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 344 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 345 idiag += 16; 346 i2 += 4; 347 } 348 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 349 PetscLogFlops(16*(a->nz)); 350 } 351 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 352 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 353 i2 = 0; 354 mdiag = a->idiag+16*a->mbs; 355 for (i=0; i<m; i++) { 356 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 357 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 358 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 359 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 360 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 361 mdiag += 16; 362 i2 += 4; 363 } 364 PetscLogFlops(28*m); 365 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 366 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 367 } 368 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 369 idiag = a->idiag+16*a->mbs - 16; 370 i2 = 4*m - 4; 371 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 372 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 373 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 374 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 375 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 376 idiag -= 16; 377 i2 -= 4; 378 for (i=m-2; i>=0; i--) { 379 v = aa + 16*(diag[i]+1); 380 vi = aj + diag[i] + 1; 381 nz = ai[i+1] - diag[i] - 1; 382 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 383 while (nz--) { 384 idx = 4*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 386 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 387 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 388 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 389 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 390 v += 16; 391 } 392 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 393 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 394 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 395 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 396 idiag -= 16; 397 i2 -= 4; 398 } 399 PetscLogFlops(16*(a->nz)); 400 } 401 } else { 402 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 403 } 404 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 414 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 415 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 416 PetscErrorCode ierr; 417 PetscInt m = a->mbs,i,i2,nz,idx; 418 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 419 420 PetscFunctionBegin; 421 its = its*lits; 422 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 423 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 424 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 425 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 426 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 427 428 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 429 430 diag = a->diag; 431 idiag = a->idiag; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 434 435 if (flag & SOR_ZERO_INITIAL_GUESS) { 436 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 437 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 438 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 439 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 440 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 441 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 442 i2 = 5; 443 idiag += 25; 444 for (i=1; i<m; i++) { 445 v = aa + 25*ai[i]; 446 vi = aj + ai[i]; 447 nz = diag[i] - ai[i]; 448 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 449 while (nz--) { 450 idx = 5*(*vi++); 451 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 452 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 453 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 454 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 455 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 456 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 457 v += 25; 458 } 459 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 460 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 461 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 462 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 463 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 464 idiag += 25; 465 i2 += 5; 466 } 467 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 468 PetscLogFlops(25*(a->nz)); 469 } 470 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 471 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 472 i2 = 0; 473 mdiag = a->idiag+25*a->mbs; 474 for (i=0; i<m; i++) { 475 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 476 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 477 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 478 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 479 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 480 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 481 mdiag += 25; 482 i2 += 5; 483 } 484 PetscLogFlops(45*m); 485 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 486 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 489 idiag = a->idiag+25*a->mbs - 25; 490 i2 = 5*m - 5; 491 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 492 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 493 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 494 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 495 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 496 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 497 idiag -= 25; 498 i2 -= 5; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 25*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 504 while (nz--) { 505 idx = 5*(*vi++); 506 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 507 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 508 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 509 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 510 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 511 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 512 v += 25; 513 } 514 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 515 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 516 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 517 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 518 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 519 idiag -= 25; 520 i2 -= 5; 521 } 522 PetscLogFlops(25*(a->nz)); 523 } 524 } else { 525 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 526 } 527 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 Special version for Fun3d sequential benchmark 534 */ 535 #if defined(PETSC_HAVE_FORTRAN_CAPS) 536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 538 #define matsetvaluesblocked4_ matsetvaluesblocked4 539 #endif 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "matsetvaluesblocked4_" 544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 545 { 546 Mat A = *AA; 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 549 PetscInt *ai=a->i,*ailen=a->ilen; 550 PetscInt *aj=a->j,stepval; 551 const PetscScalar *value = v; 552 MatScalar *ap,*aa = a->a,*bap; 553 554 PetscFunctionBegin; 555 stepval = (n-1)*4; 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 rp = aj + ai[row]; 559 ap = aa + 16*ai[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 col = in[l]; 564 value = v + k*(stepval+4)*4 + l*4; 565 low = 0; high = nrow; 566 while (high-low > 7) { 567 t = (low+high)/2; 568 if (rp[t] > col) high = t; 569 else low = t; 570 } 571 for (i=low; i<high; i++) { 572 if (rp[i] > col) break; 573 if (rp[i] == col) { 574 bap = ap + 16*i; 575 for (ii=0; ii<4; ii++,value+=stepval) { 576 for (jj=ii; jj<16; jj+=4) { 577 bap[jj] += *value++; 578 } 579 } 580 goto noinsert2; 581 } 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for (ii=N; ii>=i; ii--) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 588 } 589 if (N >= i) { 590 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 591 } 592 rp[i] = col; 593 bap = ap + 16*i; 594 for (ii=0; ii<4; ii++,value+=stepval) { 595 for (jj=ii; jj<16; jj+=4) { 596 bap[jj] = *value++; 597 } 598 } 599 noinsert2:; 600 low = i; 601 } 602 ailen[row] = nrow; 603 } 604 } 605 EXTERN_C_END 606 607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 608 #define matsetvalues4_ MATSETVALUES4 609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 610 #define matsetvalues4_ matsetvalues4 611 #endif 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatSetValues4_" 616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 617 { 618 Mat A = *AA; 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 621 PetscInt *ai=a->i,*ailen=a->ilen; 622 PetscInt *aj=a->j,brow,bcol; 623 PetscInt ridx,cidx; 624 MatScalar *ap,value,*aa=a->a,*bap; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over added rows */ 628 row = im[k]; brow = row/4; 629 rp = aj + ai[brow]; 630 ap = aa + 16*ai[brow]; 631 nrow = ailen[brow]; 632 low = 0; 633 for (l=0; l<n; l++) { /* loop over added columns */ 634 col = in[l]; bcol = col/4; 635 ridx = row % 4; cidx = col % 4; 636 value = v[l + k*n]; 637 low = 0; high = nrow; 638 while (high-low > 7) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 bap = ap + 16*i + 4*cidx + ridx; 647 *bap += value; 648 goto noinsert1; 649 } 650 } 651 N = nrow++ - 1; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 656 } 657 if (N>=i) { 658 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 659 } 660 rp[i] = bcol; 661 ap[16*i + 4*cidx + ridx] = value; 662 noinsert1:; 663 low = i; 664 } 665 ailen[brow] = nrow; 666 } 667 } 668 EXTERN_C_END 669 670 /* UGLY, ugly, ugly 671 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 672 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 673 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 674 converts the entries into single precision and then calls ..._MatScalar() to put them 675 into the single precision data structures. 676 */ 677 #if defined(PETSC_USE_MAT_SINGLE) 678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 679 #else 680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 681 #endif 682 683 #define CHUNKSIZE 10 684 685 /* 686 Checks for missing diagonals 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt *diag,*jj = a->j,i; 695 696 PetscFunctionBegin; 697 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 698 diag = a->diag; 699 for (i=0; i<a->mbs; i++) { 700 if (jj[diag[i]] != i) { 701 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt i,j,*diag,m = a->mbs; 714 715 PetscFunctionBegin; 716 if (a->diag) PetscFunctionReturn(0); 717 718 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 719 PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt)); 720 for (i=0; i<m; i++) { 721 diag[i] = a->i[i+1]; 722 for (j=a->i[i]; j<a->i[i+1]; j++) { 723 if (a->j[j] == i) { 724 diag[i] = j; 725 break; 726 } 727 } 728 } 729 a->diag = diag; 730 PetscFunctionReturn(0); 731 } 732 733 734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 741 PetscErrorCode ierr; 742 PetscInt n = a->mbs,i; 743 744 PetscFunctionBegin; 745 *nn = n; 746 if (!ia) PetscFunctionReturn(0); 747 if (symmetric) { 748 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 749 } else if (oshift == 1) { 750 /* temporarily add 1 to i and j indices */ 751 PetscInt nz = a->i[n]; 752 for (i=0; i<nz; i++) a->j[i]++; 753 for (i=0; i<n+1; i++) a->i[i]++; 754 *ia = a->i; *ja = a->j; 755 } else { 756 *ia = a->i; *ja = a->j; 757 } 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 765 { 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscInt i,n = a->mbs; 769 770 PetscFunctionBegin; 771 if (!ia) PetscFunctionReturn(0); 772 if (symmetric) { 773 ierr = PetscFree(*ia);CHKERRQ(ierr); 774 ierr = PetscFree(*ja);CHKERRQ(ierr); 775 } else if (oshift == 1) { 776 PetscInt nz = a->i[n]-1; 777 for (i=0; i<nz; i++) a->j[i]--; 778 for (i=0; i<n+1; i++) a->i[i]--; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 785 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,PetscInt *bs) 786 { 787 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 788 789 PetscFunctionBegin; 790 *bs = baij->bs; 791 PetscFunctionReturn(0); 792 } 793 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatDestroy_SeqBAIJ" 797 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 798 { 799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 800 PetscErrorCode ierr; 801 802 PetscFunctionBegin; 803 #if defined(PETSC_USE_LOG) 804 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 805 #endif 806 ierr = PetscFree(a->a);CHKERRQ(ierr); 807 if (!a->singlemalloc) { 808 ierr = PetscFree(a->i);CHKERRQ(ierr); 809 ierr = PetscFree(a->j);CHKERRQ(ierr); 810 } 811 if (a->row) { 812 ierr = ISDestroy(a->row);CHKERRQ(ierr); 813 } 814 if (a->col) { 815 ierr = ISDestroy(a->col);CHKERRQ(ierr); 816 } 817 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 818 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 819 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 820 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 821 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 822 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 823 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 824 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 825 #if defined(PETSC_USE_MAT_SINGLE) 826 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 827 #endif 828 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 829 830 ierr = PetscFree(a);CHKERRQ(ierr); 831 832 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 833 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 834 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 836 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 837 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatSetOption_SeqBAIJ" 843 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 844 { 845 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 846 847 PetscFunctionBegin; 848 switch (op) { 849 case MAT_ROW_ORIENTED: 850 a->roworiented = PETSC_TRUE; 851 break; 852 case MAT_COLUMN_ORIENTED: 853 a->roworiented = PETSC_FALSE; 854 break; 855 case MAT_COLUMNS_SORTED: 856 a->sorted = PETSC_TRUE; 857 break; 858 case MAT_COLUMNS_UNSORTED: 859 a->sorted = PETSC_FALSE; 860 break; 861 case MAT_KEEP_ZEROED_ROWS: 862 a->keepzeroedrows = PETSC_TRUE; 863 break; 864 case MAT_NO_NEW_NONZERO_LOCATIONS: 865 a->nonew = 1; 866 break; 867 case MAT_NEW_NONZERO_LOCATION_ERR: 868 a->nonew = -1; 869 break; 870 case MAT_NEW_NONZERO_ALLOCATION_ERR: 871 a->nonew = -2; 872 break; 873 case MAT_YES_NEW_NONZERO_LOCATIONS: 874 a->nonew = 0; 875 break; 876 case MAT_ROWS_SORTED: 877 case MAT_ROWS_UNSORTED: 878 case MAT_YES_NEW_DIAGONALS: 879 case MAT_IGNORE_OFF_PROC_ENTRIES: 880 case MAT_USE_HASH_TABLE: 881 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 882 break; 883 case MAT_NO_NEW_DIAGONALS: 884 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 885 case MAT_SYMMETRIC: 886 case MAT_STRUCTURALLY_SYMMETRIC: 887 case MAT_NOT_SYMMETRIC: 888 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 889 case MAT_HERMITIAN: 890 case MAT_NOT_HERMITIAN: 891 case MAT_SYMMETRY_ETERNAL: 892 case MAT_NOT_SYMMETRY_ETERNAL: 893 break; 894 default: 895 SETERRQ(PETSC_ERR_SUP,"unknown option"); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatGetRow_SeqBAIJ" 902 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 903 { 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 907 MatScalar *aa,*aa_i; 908 PetscScalar *v_i; 909 910 PetscFunctionBegin; 911 bs = a->bs; 912 ai = a->i; 913 aj = a->j; 914 aa = a->a; 915 bs2 = a->bs2; 916 917 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 918 919 bn = row/bs; /* Block number */ 920 bp = row % bs; /* Block Position */ 921 M = ai[bn+1] - ai[bn]; 922 *nz = bs*M; 923 924 if (v) { 925 *v = 0; 926 if (*nz) { 927 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 928 for (i=0; i<M; i++) { /* for each block in the block row */ 929 v_i = *v + i*bs; 930 aa_i = aa + bs2*(ai[bn] + i); 931 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 932 } 933 } 934 } 935 936 if (idx) { 937 *idx = 0; 938 if (*nz) { 939 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 940 for (i=0; i<M; i++) { /* for each block in the block row */ 941 idx_i = *idx + i*bs; 942 itmp = bs*aj[ai[bn] + i]; 943 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 944 } 945 } 946 } 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 952 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 953 { 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 958 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatTranspose_SeqBAIJ" 964 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 965 { 966 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 967 Mat C; 968 PetscErrorCode ierr; 969 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 970 PetscInt *rows,*cols,bs2=a->bs2; 971 PetscScalar *array; 972 973 PetscFunctionBegin; 974 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 975 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 976 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 977 978 #if defined(PETSC_USE_MAT_SINGLE) 979 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 980 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 981 #else 982 array = a->a; 983 #endif 984 985 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 986 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 987 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 988 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 989 ierr = PetscFree(col);CHKERRQ(ierr); 990 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 991 cols = rows + bs; 992 for (i=0; i<mbs; i++) { 993 cols[0] = i*bs; 994 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 995 len = ai[i+1] - ai[i]; 996 for (j=0; j<len; j++) { 997 rows[0] = (*aj++)*bs; 998 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 999 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1000 array += bs2; 1001 } 1002 } 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 #if defined(PETSC_USE_MAT_SINGLE) 1005 ierr = PetscFree(array); 1006 #endif 1007 1008 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1010 1011 if (B) { 1012 *B = C; 1013 } else { 1014 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1015 } 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1021 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1022 { 1023 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 PetscInt i,fd,*col_lens,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1026 PetscScalar *aa; 1027 FILE *file; 1028 1029 PetscFunctionBegin; 1030 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1031 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1032 col_lens[0] = MAT_FILE_COOKIE; 1033 1034 col_lens[1] = A->m; 1035 col_lens[2] = A->n; 1036 col_lens[3] = a->nz*bs2; 1037 1038 /* store lengths of each row and write (including header) to file */ 1039 count = 0; 1040 for (i=0; i<a->mbs; i++) { 1041 for (j=0; j<bs; j++) { 1042 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1043 } 1044 } 1045 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1046 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1047 1048 /* store column indices (zero start index) */ 1049 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1050 count = 0; 1051 for (i=0; i<a->mbs; i++) { 1052 for (j=0; j<bs; j++) { 1053 for (k=a->i[i]; k<a->i[i+1]; k++) { 1054 for (l=0; l<bs; l++) { 1055 jj[count++] = bs*a->j[k] + l; 1056 } 1057 } 1058 } 1059 } 1060 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1061 ierr = PetscFree(jj);CHKERRQ(ierr); 1062 1063 /* store nonzero values */ 1064 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1065 count = 0; 1066 for (i=0; i<a->mbs; i++) { 1067 for (j=0; j<bs; j++) { 1068 for (k=a->i[i]; k<a->i[i+1]; k++) { 1069 for (l=0; l<bs; l++) { 1070 aa[count++] = a->a[bs2*k + l*bs + j]; 1071 } 1072 } 1073 } 1074 } 1075 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1076 ierr = PetscFree(aa);CHKERRQ(ierr); 1077 1078 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1079 if (file) { 1080 fprintf(file,"-matload_block_size %d\n",a->bs); 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1087 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1088 { 1089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt i,j,bs = a->bs,k,l,bs2=a->bs2; 1092 PetscViewerFormat format; 1093 1094 PetscFunctionBegin; 1095 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1096 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1097 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1098 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1099 Mat aij; 1100 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1101 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(aij);CHKERRQ(ierr); 1103 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1104 PetscFunctionReturn(0); 1105 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1106 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1107 for (i=0; i<a->mbs; i++) { 1108 for (j=0; j<bs; j++) { 1109 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1110 for (k=a->i[i]; k<a->i[i+1]; k++) { 1111 for (l=0; l<bs; l++) { 1112 #if defined(PETSC_USE_COMPLEX) 1113 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1114 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1115 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1116 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1117 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1118 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1119 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1120 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1121 } 1122 #else 1123 if (a->a[bs2*k + l*bs + j] != 0.0) { 1124 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1125 } 1126 #endif 1127 } 1128 } 1129 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1130 } 1131 } 1132 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1133 } else { 1134 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1135 for (i=0; i<a->mbs; i++) { 1136 for (j=0; j<bs; j++) { 1137 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1138 for (k=a->i[i]; k<a->i[i+1]; k++) { 1139 for (l=0; l<bs; l++) { 1140 #if defined(PETSC_USE_COMPLEX) 1141 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1142 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1143 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1144 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1145 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1146 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1147 } else { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } 1150 #else 1151 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1152 #endif 1153 } 1154 } 1155 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1156 } 1157 } 1158 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1159 } 1160 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1166 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1167 { 1168 Mat A = (Mat) Aa; 1169 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1170 PetscErrorCode ierr; 1171 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1172 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1173 MatScalar *aa; 1174 PetscViewer viewer; 1175 1176 PetscFunctionBegin; 1177 1178 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1179 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1180 1181 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1182 1183 /* loop over matrix elements drawing boxes */ 1184 color = PETSC_DRAW_BLUE; 1185 for (i=0,row=0; i<mbs; i++,row+=bs) { 1186 for (j=a->i[i]; j<a->i[i+1]; j++) { 1187 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1188 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1189 aa = a->a + j*bs2; 1190 for (k=0; k<bs; k++) { 1191 for (l=0; l<bs; l++) { 1192 if (PetscRealPart(*aa++) >= 0.) continue; 1193 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1194 } 1195 } 1196 } 1197 } 1198 color = PETSC_DRAW_CYAN; 1199 for (i=0,row=0; i<mbs; i++,row+=bs) { 1200 for (j=a->i[i]; j<a->i[i+1]; j++) { 1201 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1202 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1203 aa = a->a + j*bs2; 1204 for (k=0; k<bs; k++) { 1205 for (l=0; l<bs; l++) { 1206 if (PetscRealPart(*aa++) != 0.) continue; 1207 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1208 } 1209 } 1210 } 1211 } 1212 1213 color = PETSC_DRAW_RED; 1214 for (i=0,row=0; i<mbs; i++,row+=bs) { 1215 for (j=a->i[i]; j<a->i[i+1]; j++) { 1216 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1217 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1218 aa = a->a + j*bs2; 1219 for (k=0; k<bs; k++) { 1220 for (l=0; l<bs; l++) { 1221 if (PetscRealPart(*aa++) <= 0.) continue; 1222 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1223 } 1224 } 1225 } 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1232 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1233 { 1234 PetscErrorCode ierr; 1235 PetscReal xl,yl,xr,yr,w,h; 1236 PetscDraw draw; 1237 PetscTruth isnull; 1238 1239 PetscFunctionBegin; 1240 1241 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1242 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1243 1244 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1245 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1246 xr += w; yr += h; xl = -w; yl = -h; 1247 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1248 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1249 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatView_SeqBAIJ" 1255 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1256 { 1257 PetscErrorCode ierr; 1258 PetscTruth iascii,isbinary,isdraw; 1259 1260 PetscFunctionBegin; 1261 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1262 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1263 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1264 if (iascii){ 1265 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1266 } else if (isbinary) { 1267 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1268 } else if (isdraw) { 1269 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1270 } else { 1271 Mat B; 1272 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1273 ierr = MatView(B,viewer);CHKERRQ(ierr); 1274 ierr = MatDestroy(B);CHKERRQ(ierr); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1282 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1283 { 1284 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1285 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1286 PetscInt *ai = a->i,*ailen = a->ilen; 1287 PetscInt brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1288 MatScalar *ap,*aa = a->a,zero = 0.0; 1289 1290 PetscFunctionBegin; 1291 for (k=0; k<m; k++) { /* loop over rows */ 1292 row = im[k]; brow = row/bs; 1293 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1294 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1295 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1296 nrow = ailen[brow]; 1297 for (l=0; l<n; l++) { /* loop over columns */ 1298 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1299 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1300 col = in[l] ; 1301 bcol = col/bs; 1302 cidx = col%bs; 1303 ridx = row%bs; 1304 high = nrow; 1305 low = 0; /* assume unsorted */ 1306 while (high-low > 5) { 1307 t = (low+high)/2; 1308 if (rp[t] > bcol) high = t; 1309 else low = t; 1310 } 1311 for (i=low; i<high; i++) { 1312 if (rp[i] > bcol) break; 1313 if (rp[i] == bcol) { 1314 *v++ = ap[bs2*i+bs*cidx+ridx]; 1315 goto finished; 1316 } 1317 } 1318 *v++ = zero; 1319 finished:; 1320 } 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #if defined(PETSC_USE_MAT_SINGLE) 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1328 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1329 { 1330 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1331 PetscErrorCode ierr; 1332 PetscInt i,N = m*n*b->bs2; 1333 MatScalar *vsingle; 1334 1335 PetscFunctionBegin; 1336 if (N > b->setvalueslen) { 1337 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1338 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1339 b->setvalueslen = N; 1340 } 1341 vsingle = b->setvaluescopy; 1342 for (i=0; i<N; i++) { 1343 vsingle[i] = v[i]; 1344 } 1345 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 #endif 1349 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1353 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1354 { 1355 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1356 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1357 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1358 PetscErrorCode ierr; 1359 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 1360 PetscTruth roworiented=a->roworiented; 1361 const MatScalar *value = v; 1362 MatScalar *ap,*aa = a->a,*bap; 1363 1364 PetscFunctionBegin; 1365 if (roworiented) { 1366 stepval = (n-1)*bs; 1367 } else { 1368 stepval = (m-1)*bs; 1369 } 1370 for (k=0; k<m; k++) { /* loop over added rows */ 1371 row = im[k]; 1372 if (row < 0) continue; 1373 #if defined(PETSC_USE_BOPT_g) 1374 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1375 #endif 1376 rp = aj + ai[row]; 1377 ap = aa + bs2*ai[row]; 1378 rmax = imax[row]; 1379 nrow = ailen[row]; 1380 low = 0; 1381 for (l=0; l<n; l++) { /* loop over added columns */ 1382 if (in[l] < 0) continue; 1383 #if defined(PETSC_USE_BOPT_g) 1384 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1385 #endif 1386 col = in[l]; 1387 if (roworiented) { 1388 value = v + k*(stepval+bs)*bs + l*bs; 1389 } else { 1390 value = v + l*(stepval+bs)*bs + k*bs; 1391 } 1392 if (!sorted) low = 0; high = nrow; 1393 while (high-low > 7) { 1394 t = (low+high)/2; 1395 if (rp[t] > col) high = t; 1396 else low = t; 1397 } 1398 for (i=low; i<high; i++) { 1399 if (rp[i] > col) break; 1400 if (rp[i] == col) { 1401 bap = ap + bs2*i; 1402 if (roworiented) { 1403 if (is == ADD_VALUES) { 1404 for (ii=0; ii<bs; ii++,value+=stepval) { 1405 for (jj=ii; jj<bs2; jj+=bs) { 1406 bap[jj] += *value++; 1407 } 1408 } 1409 } else { 1410 for (ii=0; ii<bs; ii++,value+=stepval) { 1411 for (jj=ii; jj<bs2; jj+=bs) { 1412 bap[jj] = *value++; 1413 } 1414 } 1415 } 1416 } else { 1417 if (is == ADD_VALUES) { 1418 for (ii=0; ii<bs; ii++,value+=stepval) { 1419 for (jj=0; jj<bs; jj++) { 1420 *bap++ += *value++; 1421 } 1422 } 1423 } else { 1424 for (ii=0; ii<bs; ii++,value+=stepval) { 1425 for (jj=0; jj<bs; jj++) { 1426 *bap++ = *value++; 1427 } 1428 } 1429 } 1430 } 1431 goto noinsert2; 1432 } 1433 } 1434 if (nonew == 1) goto noinsert2; 1435 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1436 if (nrow >= rmax) { 1437 /* there is no extra room in row, therefore enlarge */ 1438 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1439 MatScalar *new_a; 1440 1441 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1442 1443 /* malloc new storage space */ 1444 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1445 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1446 new_j = (int*)(new_a + bs2*new_nz); 1447 new_i = new_j + new_nz; 1448 1449 /* copy over old data into new slots */ 1450 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1451 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1452 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1453 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1454 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1455 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1456 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1457 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1458 /* free up old matrix storage */ 1459 ierr = PetscFree(a->a);CHKERRQ(ierr); 1460 if (!a->singlemalloc) { 1461 ierr = PetscFree(a->i);CHKERRQ(ierr); 1462 ierr = PetscFree(a->j);CHKERRQ(ierr); 1463 } 1464 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1465 a->singlemalloc = PETSC_TRUE; 1466 1467 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1468 rmax = imax[row] = imax[row] + CHUNKSIZE; 1469 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 1470 a->maxnz += bs2*CHUNKSIZE; 1471 a->reallocs++; 1472 a->nz++; 1473 } 1474 N = nrow++ - 1; 1475 /* shift up all the later entries in this row */ 1476 for (ii=N; ii>=i; ii--) { 1477 rp[ii+1] = rp[ii]; 1478 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1479 } 1480 if (N >= i) { 1481 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482 } 1483 rp[i] = col; 1484 bap = ap + bs2*i; 1485 if (roworiented) { 1486 for (ii=0; ii<bs; ii++,value+=stepval) { 1487 for (jj=ii; jj<bs2; jj+=bs) { 1488 bap[jj] = *value++; 1489 } 1490 } 1491 } else { 1492 for (ii=0; ii<bs; ii++,value+=stepval) { 1493 for (jj=0; jj<bs; jj++) { 1494 *bap++ = *value++; 1495 } 1496 } 1497 } 1498 noinsert2:; 1499 low = i; 1500 } 1501 ailen[row] = nrow; 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1508 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1509 { 1510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1511 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1512 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1513 PetscErrorCode ierr; 1514 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1515 MatScalar *aa = a->a,*ap; 1516 1517 PetscFunctionBegin; 1518 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1519 1520 if (m) rmax = ailen[0]; 1521 for (i=1; i<mbs; i++) { 1522 /* move each row back by the amount of empty slots (fshift) before it*/ 1523 fshift += imax[i-1] - ailen[i-1]; 1524 rmax = PetscMax(rmax,ailen[i]); 1525 if (fshift) { 1526 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1527 N = ailen[i]; 1528 for (j=0; j<N; j++) { 1529 ip[j-fshift] = ip[j]; 1530 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1531 } 1532 } 1533 ai[i] = ai[i-1] + ailen[i-1]; 1534 } 1535 if (mbs) { 1536 fshift += imax[mbs-1] - ailen[mbs-1]; 1537 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1538 } 1539 /* reset ilen and imax for each row */ 1540 for (i=0; i<mbs; i++) { 1541 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1542 } 1543 a->nz = ai[mbs]; 1544 1545 /* diagonals may have moved, so kill the diagonal pointers */ 1546 a->idiagvalid = PETSC_FALSE; 1547 if (fshift && a->diag) { 1548 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1549 PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt)); 1550 a->diag = 0; 1551 } 1552 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1553 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1554 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1555 a->reallocs = 0; 1556 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1557 1558 PetscFunctionReturn(0); 1559 } 1560 1561 1562 1563 /* 1564 This function returns an array of flags which indicate the locations of contiguous 1565 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1566 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1567 Assume: sizes should be long enough to hold all the values. 1568 */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1572 { 1573 PetscInt i,j,k,row; 1574 PetscTruth flg; 1575 1576 PetscFunctionBegin; 1577 for (i=0,j=0; i<n; j++) { 1578 row = idx[i]; 1579 if (row%bs!=0) { /* Not the begining of a block */ 1580 sizes[j] = 1; 1581 i++; 1582 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1583 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1584 i++; 1585 } else { /* Begining of the block, so check if the complete block exists */ 1586 flg = PETSC_TRUE; 1587 for (k=1; k<bs; k++) { 1588 if (row+k != idx[i+k]) { /* break in the block */ 1589 flg = PETSC_FALSE; 1590 break; 1591 } 1592 } 1593 if (flg == PETSC_TRUE) { /* No break in the bs */ 1594 sizes[j] = bs; 1595 i+= bs; 1596 } else { 1597 sizes[j] = 1; 1598 i++; 1599 } 1600 } 1601 } 1602 *bs_max = j; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1609 { 1610 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1611 PetscErrorCode ierr; 1612 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1613 PetscInt bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1614 PetscScalar zero = 0.0; 1615 MatScalar *aa; 1616 1617 PetscFunctionBegin; 1618 /* Make a copy of the IS and sort it */ 1619 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1620 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1621 1622 /* allocate memory for rows,sizes */ 1623 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1624 sizes = rows + is_n; 1625 1626 /* copy IS values to rows, and sort them */ 1627 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1628 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1629 if (baij->keepzeroedrows) { 1630 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1631 bs_max = is_n; 1632 } else { 1633 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1634 } 1635 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1636 1637 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1638 row = rows[j]; 1639 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1640 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1641 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1642 if (sizes[i] == bs && !baij->keepzeroedrows) { 1643 if (diag) { 1644 if (baij->ilen[row/bs] > 0) { 1645 baij->ilen[row/bs] = 1; 1646 baij->j[baij->i[row/bs]] = row/bs; 1647 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1648 } 1649 /* Now insert all the diagonal values for this bs */ 1650 for (k=0; k<bs; k++) { 1651 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } else { /* (!diag) */ 1654 baij->ilen[row/bs] = 0; 1655 } /* end (!diag) */ 1656 } else { /* (sizes[i] != bs) */ 1657 #if defined (PETSC_USE_DEBUG) 1658 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1659 #endif 1660 for (k=0; k<count; k++) { 1661 aa[0] = zero; 1662 aa += bs; 1663 } 1664 if (diag) { 1665 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 1670 ierr = PetscFree(rows);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1680 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1681 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1682 PetscInt *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1683 PetscErrorCode ierr; 1684 PetscInt ridx,cidx,bs2=a->bs2; 1685 PetscTruth roworiented=a->roworiented; 1686 MatScalar *ap,value,*aa=a->a,*bap; 1687 1688 PetscFunctionBegin; 1689 for (k=0; k<m; k++) { /* loop over added rows */ 1690 row = im[k]; brow = row/bs; 1691 if (row < 0) continue; 1692 #if defined(PETSC_USE_BOPT_g) 1693 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1694 #endif 1695 rp = aj + ai[brow]; 1696 ap = aa + bs2*ai[brow]; 1697 rmax = imax[brow]; 1698 nrow = ailen[brow]; 1699 low = 0; 1700 for (l=0; l<n; l++) { /* loop over added columns */ 1701 if (in[l] < 0) continue; 1702 #if defined(PETSC_USE_BOPT_g) 1703 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1704 #endif 1705 col = in[l]; bcol = col/bs; 1706 ridx = row % bs; cidx = col % bs; 1707 if (roworiented) { 1708 value = v[l + k*n]; 1709 } else { 1710 value = v[k + l*m]; 1711 } 1712 if (!sorted) low = 0; high = nrow; 1713 while (high-low > 7) { 1714 t = (low+high)/2; 1715 if (rp[t] > bcol) high = t; 1716 else low = t; 1717 } 1718 for (i=low; i<high; i++) { 1719 if (rp[i] > bcol) break; 1720 if (rp[i] == bcol) { 1721 bap = ap + bs2*i + bs*cidx + ridx; 1722 if (is == ADD_VALUES) *bap += value; 1723 else *bap = value; 1724 goto noinsert1; 1725 } 1726 } 1727 if (nonew == 1) goto noinsert1; 1728 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1729 if (nrow >= rmax) { 1730 /* there is no extra room in row, therefore enlarge */ 1731 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1732 MatScalar *new_a; 1733 1734 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1735 1736 /* Malloc new storage space */ 1737 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1738 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1739 new_j = (PetscInt*)(new_a + bs2*new_nz); 1740 new_i = new_j + new_nz; 1741 1742 /* copy over old data into new slots */ 1743 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1744 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1745 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1746 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1747 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1748 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1749 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1751 /* free up old matrix storage */ 1752 ierr = PetscFree(a->a);CHKERRQ(ierr); 1753 if (!a->singlemalloc) { 1754 ierr = PetscFree(a->i);CHKERRQ(ierr); 1755 ierr = PetscFree(a->j);CHKERRQ(ierr); 1756 } 1757 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1758 a->singlemalloc = PETSC_TRUE; 1759 1760 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1761 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1762 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 1763 a->maxnz += bs2*CHUNKSIZE; 1764 a->reallocs++; 1765 a->nz++; 1766 } 1767 N = nrow++ - 1; 1768 /* shift up all the later entries in this row */ 1769 for (ii=N; ii>=i; ii--) { 1770 rp[ii+1] = rp[ii]; 1771 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1772 } 1773 if (N>=i) { 1774 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1775 } 1776 rp[i] = bcol; 1777 ap[bs2*i + bs*cidx + ridx] = value; 1778 noinsert1:; 1779 low = i; 1780 } 1781 ailen[brow] = nrow; 1782 } 1783 PetscFunctionReturn(0); 1784 } 1785 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1789 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1790 { 1791 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1792 Mat outA; 1793 PetscErrorCode ierr; 1794 PetscTruth row_identity,col_identity; 1795 1796 PetscFunctionBegin; 1797 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1798 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1799 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1800 if (!row_identity || !col_identity) { 1801 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1802 } 1803 1804 outA = inA; 1805 inA->factor = FACTOR_LU; 1806 1807 if (!a->diag) { 1808 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1809 } 1810 1811 a->row = row; 1812 a->col = col; 1813 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1814 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1815 1816 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1817 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1818 PetscLogObjectParent(inA,a->icol); 1819 1820 /* 1821 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1822 for ILU(0) factorization with natural ordering 1823 */ 1824 if (a->bs < 8) { 1825 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1826 } else { 1827 if (!a->solve_work) { 1828 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1829 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1830 } 1831 } 1832 1833 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1834 1835 PetscFunctionReturn(0); 1836 } 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1839 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1840 { 1841 static PetscTruth called = PETSC_FALSE; 1842 MPI_Comm comm = A->comm; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1847 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1848 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 EXTERN_C_BEGIN 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1855 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1856 { 1857 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1858 PetscInt i,nz,nbs; 1859 1860 PetscFunctionBegin; 1861 nz = baij->maxnz/baij->bs2; 1862 nbs = baij->nbs; 1863 for (i=0; i<nz; i++) { 1864 baij->j[i] = indices[i]; 1865 } 1866 baij->nz = nz; 1867 for (i=0; i<nbs; i++) { 1868 baij->ilen[i] = baij->imax[i]; 1869 } 1870 1871 PetscFunctionReturn(0); 1872 } 1873 EXTERN_C_END 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1877 /*@ 1878 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1879 in the matrix. 1880 1881 Input Parameters: 1882 + mat - the SeqBAIJ matrix 1883 - indices - the column indices 1884 1885 Level: advanced 1886 1887 Notes: 1888 This can be called if you have precomputed the nonzero structure of the 1889 matrix and want to provide it to the matrix object to improve the performance 1890 of the MatSetValues() operation. 1891 1892 You MUST have set the correct numbers of nonzeros per row in the call to 1893 MatCreateSeqBAIJ(). 1894 1895 MUST be called before any calls to MatSetValues(); 1896 1897 @*/ 1898 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1899 { 1900 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1901 1902 PetscFunctionBegin; 1903 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1904 PetscValidPointer(indices,2); 1905 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1906 if (f) { 1907 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1908 } else { 1909 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1916 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1917 { 1918 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1919 PetscErrorCode ierr; 1920 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1921 PetscReal atmp; 1922 PetscScalar *x,zero = 0.0; 1923 MatScalar *aa; 1924 PetscInt ncols,brow,krow,kcol; 1925 1926 PetscFunctionBegin; 1927 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1928 bs = a->bs; 1929 aa = a->a; 1930 ai = a->i; 1931 aj = a->j; 1932 mbs = a->mbs; 1933 1934 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1935 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1936 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1937 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1938 for (i=0; i<mbs; i++) { 1939 ncols = ai[1] - ai[0]; ai++; 1940 brow = bs*i; 1941 for (j=0; j<ncols; j++){ 1942 /* bcol = bs*(*aj); */ 1943 for (kcol=0; kcol<bs; kcol++){ 1944 for (krow=0; krow<bs; krow++){ 1945 atmp = PetscAbsScalar(*aa); aa++; 1946 row = brow + krow; /* row index */ 1947 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1948 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1949 } 1950 } 1951 aj++; 1952 } 1953 } 1954 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1960 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1961 { 1962 PetscErrorCode ierr; 1963 1964 PetscFunctionBegin; 1965 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1971 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1972 { 1973 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1974 PetscFunctionBegin; 1975 *array = a->a; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1981 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1982 { 1983 PetscFunctionBegin; 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #include "petscblaslapack.h" 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1990 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1991 { 1992 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1993 PetscErrorCode ierr; 1994 PetscInt i,bs=y->bs,j,bs2; 1995 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1996 1997 PetscFunctionBegin; 1998 if (str == SAME_NONZERO_PATTERN) { 1999 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2000 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2001 if (y->xtoy && y->XtoY != X) { 2002 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2003 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2004 } 2005 if (!y->xtoy) { /* get xtoy */ 2006 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2007 y->XtoY = X; 2008 } 2009 bs2 = bs*bs; 2010 for (i=0; i<x->nz; i++) { 2011 j = 0; 2012 while (j < bs2){ 2013 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2014 j++; 2015 } 2016 } 2017 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 2018 } else { 2019 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2020 } 2021 PetscFunctionReturn(0); 2022 } 2023 2024 /* -------------------------------------------------------------------*/ 2025 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2026 MatGetRow_SeqBAIJ, 2027 MatRestoreRow_SeqBAIJ, 2028 MatMult_SeqBAIJ_N, 2029 /* 4*/ MatMultAdd_SeqBAIJ_N, 2030 MatMultTranspose_SeqBAIJ, 2031 MatMultTransposeAdd_SeqBAIJ, 2032 MatSolve_SeqBAIJ_N, 2033 0, 2034 0, 2035 /*10*/ 0, 2036 MatLUFactor_SeqBAIJ, 2037 0, 2038 0, 2039 MatTranspose_SeqBAIJ, 2040 /*15*/ MatGetInfo_SeqBAIJ, 2041 MatEqual_SeqBAIJ, 2042 MatGetDiagonal_SeqBAIJ, 2043 MatDiagonalScale_SeqBAIJ, 2044 MatNorm_SeqBAIJ, 2045 /*20*/ 0, 2046 MatAssemblyEnd_SeqBAIJ, 2047 0, 2048 MatSetOption_SeqBAIJ, 2049 MatZeroEntries_SeqBAIJ, 2050 /*25*/ MatZeroRows_SeqBAIJ, 2051 MatLUFactorSymbolic_SeqBAIJ, 2052 MatLUFactorNumeric_SeqBAIJ_N, 2053 0, 2054 0, 2055 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2056 MatILUFactorSymbolic_SeqBAIJ, 2057 0, 2058 MatGetArray_SeqBAIJ, 2059 MatRestoreArray_SeqBAIJ, 2060 /*35*/ MatDuplicate_SeqBAIJ, 2061 0, 2062 0, 2063 MatILUFactor_SeqBAIJ, 2064 0, 2065 /*40*/ MatAXPY_SeqBAIJ, 2066 MatGetSubMatrices_SeqBAIJ, 2067 MatIncreaseOverlap_SeqBAIJ, 2068 MatGetValues_SeqBAIJ, 2069 0, 2070 /*45*/ MatPrintHelp_SeqBAIJ, 2071 MatScale_SeqBAIJ, 2072 0, 2073 0, 2074 0, 2075 /*50*/ MatGetBlockSize_SeqBAIJ, 2076 MatGetRowIJ_SeqBAIJ, 2077 MatRestoreRowIJ_SeqBAIJ, 2078 0, 2079 0, 2080 /*55*/ 0, 2081 0, 2082 0, 2083 0, 2084 MatSetValuesBlocked_SeqBAIJ, 2085 /*60*/ MatGetSubMatrix_SeqBAIJ, 2086 MatDestroy_SeqBAIJ, 2087 MatView_SeqBAIJ, 2088 MatGetPetscMaps_Petsc, 2089 0, 2090 /*65*/ 0, 2091 0, 2092 0, 2093 0, 2094 0, 2095 /*70*/ MatGetRowMax_SeqBAIJ, 2096 MatConvert_Basic, 2097 0, 2098 0, 2099 0, 2100 /*75*/ 0, 2101 0, 2102 0, 2103 0, 2104 0, 2105 /*80*/ 0, 2106 0, 2107 0, 2108 0, 2109 MatLoad_SeqBAIJ, 2110 /*85*/ 0, 2111 0, 2112 0, 2113 0, 2114 0, 2115 /*90*/ 0, 2116 0, 2117 0, 2118 0, 2119 0, 2120 /*95*/ 0, 2121 0, 2122 0, 2123 0}; 2124 2125 EXTERN_C_BEGIN 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2128 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2129 { 2130 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2131 PetscInt nz = aij->i[mat->m]*aij->bs*aij->bs2; 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 if (aij->nonew != 1) { 2136 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2137 } 2138 2139 /* allocate space for values if not already there */ 2140 if (!aij->saved_values) { 2141 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2142 } 2143 2144 /* copy values over */ 2145 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 EXTERN_C_END 2149 2150 EXTERN_C_BEGIN 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2153 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2154 { 2155 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2156 PetscErrorCode ierr; 2157 PetscInt nz = aij->i[mat->m]*aij->bs*aij->bs2; 2158 2159 PetscFunctionBegin; 2160 if (aij->nonew != 1) { 2161 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2162 } 2163 if (!aij->saved_values) { 2164 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2165 } 2166 2167 /* copy values over */ 2168 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 EXTERN_C_END 2172 2173 EXTERN_C_BEGIN 2174 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2175 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2176 EXTERN_C_END 2177 2178 EXTERN_C_BEGIN 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2181 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2182 { 2183 Mat_SeqBAIJ *b; 2184 PetscErrorCode ierr; 2185 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2186 PetscTruth flg; 2187 2188 PetscFunctionBegin; 2189 2190 B->preallocated = PETSC_TRUE; 2191 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2192 if (nnz && newbs != bs) { 2193 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2194 } 2195 bs = newbs; 2196 2197 mbs = B->m/bs; 2198 nbs = B->n/bs; 2199 bs2 = bs*bs; 2200 2201 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2202 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2203 } 2204 2205 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2206 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2207 if (nnz) { 2208 for (i=0; i<mbs; i++) { 2209 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2210 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2211 } 2212 } 2213 2214 b = (Mat_SeqBAIJ*)B->data; 2215 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2216 B->ops->solve = MatSolve_SeqBAIJ_Update; 2217 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2218 if (!flg) { 2219 switch (bs) { 2220 case 1: 2221 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2222 B->ops->mult = MatMult_SeqBAIJ_1; 2223 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2224 break; 2225 case 2: 2226 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2227 B->ops->mult = MatMult_SeqBAIJ_2; 2228 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2229 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2230 break; 2231 case 3: 2232 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2233 B->ops->mult = MatMult_SeqBAIJ_3; 2234 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2235 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2236 break; 2237 case 4: 2238 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2239 B->ops->mult = MatMult_SeqBAIJ_4; 2240 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2241 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2242 break; 2243 case 5: 2244 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2245 B->ops->mult = MatMult_SeqBAIJ_5; 2246 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2247 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2248 break; 2249 case 6: 2250 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2251 B->ops->mult = MatMult_SeqBAIJ_6; 2252 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2253 break; 2254 case 7: 2255 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2256 B->ops->mult = MatMult_SeqBAIJ_7; 2257 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2258 break; 2259 default: 2260 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2261 B->ops->mult = MatMult_SeqBAIJ_N; 2262 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2263 break; 2264 } 2265 } 2266 b->bs = bs; 2267 b->mbs = mbs; 2268 b->nbs = nbs; 2269 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2270 if (!nnz) { 2271 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2272 else if (nz <= 0) nz = 1; 2273 for (i=0; i<mbs; i++) b->imax[i] = nz; 2274 nz = nz*mbs; 2275 } else { 2276 nz = 0; 2277 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2278 } 2279 2280 /* allocate the matrix space */ 2281 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2282 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2283 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2284 b->j = (PetscInt*)(b->a + nz*bs2); 2285 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2286 b->i = b->j + nz; 2287 b->singlemalloc = PETSC_TRUE; 2288 2289 b->i[0] = 0; 2290 for (i=1; i<mbs+1; i++) { 2291 b->i[i] = b->i[i-1] + b->imax[i-1]; 2292 } 2293 2294 /* b->ilen will count nonzeros in each block row so far. */ 2295 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2296 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2297 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2298 2299 b->bs = bs; 2300 b->bs2 = bs2; 2301 b->mbs = mbs; 2302 b->nz = 0; 2303 b->maxnz = nz*bs2; 2304 B->info.nz_unneeded = (PetscReal)b->maxnz; 2305 PetscFunctionReturn(0); 2306 } 2307 EXTERN_C_END 2308 2309 /*MC 2310 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2311 block sparse compressed row format. 2312 2313 Options Database Keys: 2314 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2315 2316 Level: beginner 2317 2318 .seealso: MatCreateSeqBAIJ 2319 M*/ 2320 2321 EXTERN_C_BEGIN 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatCreate_SeqBAIJ" 2324 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2325 { 2326 PetscErrorCode ierr; 2327 PetscMPIInt size; 2328 Mat_SeqBAIJ *b; 2329 2330 PetscFunctionBegin; 2331 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2332 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2333 2334 B->m = B->M = PetscMax(B->m,B->M); 2335 B->n = B->N = PetscMax(B->n,B->N); 2336 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2337 B->data = (void*)b; 2338 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2339 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2340 B->factor = 0; 2341 B->lupivotthreshold = 1.0; 2342 B->mapping = 0; 2343 b->row = 0; 2344 b->col = 0; 2345 b->icol = 0; 2346 b->reallocs = 0; 2347 b->saved_values = 0; 2348 #if defined(PETSC_USE_MAT_SINGLE) 2349 b->setvalueslen = 0; 2350 b->setvaluescopy = PETSC_NULL; 2351 #endif 2352 2353 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2354 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2355 2356 b->sorted = PETSC_FALSE; 2357 b->roworiented = PETSC_TRUE; 2358 b->nonew = 0; 2359 b->diag = 0; 2360 b->solve_work = 0; 2361 b->mult_work = 0; 2362 B->spptr = 0; 2363 B->info.nz_unneeded = (PetscReal)b->maxnz; 2364 b->keepzeroedrows = PETSC_FALSE; 2365 b->xtoy = 0; 2366 b->XtoY = 0; 2367 2368 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2369 "MatStoreValues_SeqBAIJ", 2370 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2371 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2372 "MatRetrieveValues_SeqBAIJ", 2373 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2374 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2375 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2376 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2377 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2378 "MatConvert_SeqBAIJ_SeqAIJ", 2379 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2380 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2381 "MatConvert_SeqBAIJ_SeqSBAIJ", 2382 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2383 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2384 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2385 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 EXTERN_C_END 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2392 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2393 { 2394 Mat C; 2395 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2398 2399 PetscFunctionBegin; 2400 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2401 2402 *B = 0; 2403 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2404 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2405 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2406 c = (Mat_SeqBAIJ*)C->data; 2407 2408 C->M = A->M; 2409 C->N = A->N; 2410 c->bs = a->bs; 2411 c->bs2 = a->bs2; 2412 c->mbs = a->mbs; 2413 c->nbs = a->nbs; 2414 2415 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2416 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2417 for (i=0; i<mbs; i++) { 2418 c->imax[i] = a->imax[i]; 2419 c->ilen[i] = a->ilen[i]; 2420 } 2421 2422 /* allocate the matrix space */ 2423 c->singlemalloc = PETSC_TRUE; 2424 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2425 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2426 c->j = (int*)(c->a + nz*bs2); 2427 c->i = c->j + nz; 2428 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2429 if (mbs > 0) { 2430 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2431 if (cpvalues == MAT_COPY_VALUES) { 2432 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2433 } else { 2434 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2435 } 2436 } 2437 2438 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2439 c->sorted = a->sorted; 2440 c->roworiented = a->roworiented; 2441 c->nonew = a->nonew; 2442 2443 if (a->diag) { 2444 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2445 PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt)); 2446 for (i=0; i<mbs; i++) { 2447 c->diag[i] = a->diag[i]; 2448 } 2449 } else c->diag = 0; 2450 c->nz = a->nz; 2451 c->maxnz = a->maxnz; 2452 c->solve_work = 0; 2453 c->mult_work = 0; 2454 C->preallocated = PETSC_TRUE; 2455 C->assembled = PETSC_TRUE; 2456 *B = C; 2457 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatLoad_SeqBAIJ" 2463 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2464 { 2465 Mat_SeqBAIJ *a; 2466 Mat B; 2467 PetscErrorCode ierr; 2468 PetscInt i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2469 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2470 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2471 PetscInt *masked,nmask,tmp,bs2,ishift; 2472 PetscScalar *aa; 2473 MPI_Comm comm = ((PetscObject)viewer)->comm; 2474 2475 PetscFunctionBegin; 2476 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2477 bs2 = bs*bs; 2478 2479 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2480 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2481 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2482 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2483 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2484 M = header[1]; N = header[2]; nz = header[3]; 2485 2486 if (header[3] < 0) { 2487 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2488 } 2489 2490 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2491 2492 /* 2493 This code adds extra rows to make sure the number of rows is 2494 divisible by the blocksize 2495 */ 2496 mbs = M/bs; 2497 extra_rows = bs - M + bs*(mbs); 2498 if (extra_rows == bs) extra_rows = 0; 2499 else mbs++; 2500 if (extra_rows) { 2501 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2502 } 2503 2504 /* read in row lengths */ 2505 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2506 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2507 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2508 2509 /* read in column indices */ 2510 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2511 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2512 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2513 2514 /* loop over row lengths determining block row lengths */ 2515 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2516 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2517 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2518 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2519 masked = mask + mbs; 2520 rowcount = 0; nzcount = 0; 2521 for (i=0; i<mbs; i++) { 2522 nmask = 0; 2523 for (j=0; j<bs; j++) { 2524 kmax = rowlengths[rowcount]; 2525 for (k=0; k<kmax; k++) { 2526 tmp = jj[nzcount++]/bs; 2527 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2528 } 2529 rowcount++; 2530 } 2531 browlengths[i] += nmask; 2532 /* zero out the mask elements we set */ 2533 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2534 } 2535 2536 /* create our matrix */ 2537 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2538 ierr = MatSetType(B,type);CHKERRQ(ierr); 2539 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2540 a = (Mat_SeqBAIJ*)B->data; 2541 2542 /* set matrix "i" values */ 2543 a->i[0] = 0; 2544 for (i=1; i<= mbs; i++) { 2545 a->i[i] = a->i[i-1] + browlengths[i-1]; 2546 a->ilen[i-1] = browlengths[i-1]; 2547 } 2548 a->nz = 0; 2549 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2550 2551 /* read in nonzero values */ 2552 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2553 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2554 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2555 2556 /* set "a" and "j" values into matrix */ 2557 nzcount = 0; jcount = 0; 2558 for (i=0; i<mbs; i++) { 2559 nzcountb = nzcount; 2560 nmask = 0; 2561 for (j=0; j<bs; j++) { 2562 kmax = rowlengths[i*bs+j]; 2563 for (k=0; k<kmax; k++) { 2564 tmp = jj[nzcount++]/bs; 2565 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2566 } 2567 } 2568 /* sort the masked values */ 2569 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2570 2571 /* set "j" values into matrix */ 2572 maskcount = 1; 2573 for (j=0; j<nmask; j++) { 2574 a->j[jcount++] = masked[j]; 2575 mask[masked[j]] = maskcount++; 2576 } 2577 /* set "a" values into matrix */ 2578 ishift = bs2*a->i[i]; 2579 for (j=0; j<bs; j++) { 2580 kmax = rowlengths[i*bs+j]; 2581 for (k=0; k<kmax; k++) { 2582 tmp = jj[nzcountb]/bs ; 2583 block = mask[tmp] - 1; 2584 point = jj[nzcountb] - bs*tmp; 2585 idx = ishift + bs2*block + j + bs*point; 2586 a->a[idx] = (MatScalar)aa[nzcountb++]; 2587 } 2588 } 2589 /* zero out the mask elements we set */ 2590 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2591 } 2592 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2593 2594 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2595 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2596 ierr = PetscFree(aa);CHKERRQ(ierr); 2597 ierr = PetscFree(jj);CHKERRQ(ierr); 2598 ierr = PetscFree(mask);CHKERRQ(ierr); 2599 2600 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2601 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2602 ierr = MatView_Private(B);CHKERRQ(ierr); 2603 2604 *A = B; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatCreateSeqBAIJ" 2610 /*@C 2611 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2612 compressed row) format. For good matrix assembly performance the 2613 user should preallocate the matrix storage by setting the parameter nz 2614 (or the array nnz). By setting these parameters accurately, performance 2615 during matrix assembly can be increased by more than a factor of 50. 2616 2617 Collective on MPI_Comm 2618 2619 Input Parameters: 2620 + comm - MPI communicator, set to PETSC_COMM_SELF 2621 . bs - size of block 2622 . m - number of rows 2623 . n - number of columns 2624 . nz - number of nonzero blocks per block row (same for all rows) 2625 - nnz - array containing the number of nonzero blocks in the various block rows 2626 (possibly different for each block row) or PETSC_NULL 2627 2628 Output Parameter: 2629 . A - the matrix 2630 2631 Options Database Keys: 2632 . -mat_no_unroll - uses code that does not unroll the loops in the 2633 block calculations (much slower) 2634 . -mat_block_size - size of the blocks to use 2635 2636 Level: intermediate 2637 2638 Notes: 2639 A nonzero block is any block that as 1 or more nonzeros in it 2640 2641 The block AIJ format is fully compatible with standard Fortran 77 2642 storage. That is, the stored row and column indices can begin at 2643 either one (as in Fortran) or zero. See the users' manual for details. 2644 2645 Specify the preallocated storage with either nz or nnz (not both). 2646 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2647 allocation. For additional details, see the users manual chapter on 2648 matrices. 2649 2650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2651 @*/ 2652 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2653 { 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2658 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2659 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 #undef __FUNCT__ 2664 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2665 /*@C 2666 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2667 per row in the matrix. For good matrix assembly performance the 2668 user should preallocate the matrix storage by setting the parameter nz 2669 (or the array nnz). By setting these parameters accurately, performance 2670 during matrix assembly can be increased by more than a factor of 50. 2671 2672 Collective on MPI_Comm 2673 2674 Input Parameters: 2675 + A - the matrix 2676 . bs - size of block 2677 . nz - number of block nonzeros per block row (same for all rows) 2678 - nnz - array containing the number of block nonzeros in the various block rows 2679 (possibly different for each block row) or PETSC_NULL 2680 2681 Options Database Keys: 2682 . -mat_no_unroll - uses code that does not unroll the loops in the 2683 block calculations (much slower) 2684 . -mat_block_size - size of the blocks to use 2685 2686 Level: intermediate 2687 2688 Notes: 2689 The block AIJ format is fully compatible with standard Fortran 77 2690 storage. That is, the stored row and column indices can begin at 2691 either one (as in Fortran) or zero. See the users' manual for details. 2692 2693 Specify the preallocated storage with either nz or nnz (not both). 2694 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2695 allocation. For additional details, see the users manual chapter on 2696 matrices. 2697 2698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2699 @*/ 2700 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2701 { 2702 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2703 2704 PetscFunctionBegin; 2705 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2706 if (f) { 2707 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2708 } 2709 PetscFunctionReturn(0); 2710 } 2711 2712