1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = a->bs,mbs = a->mbs; 18 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 19 20 PetscFunctionBegin; 21 if (a->idiagvalid) PetscFunctionReturn(0); 22 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 23 diag_offset = a->diag; 24 if (!a->idiag) { 25 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 26 } 27 diag = a->idiag; 28 mdiag = a->idiag+bs*bs*mbs; 29 /* factor and invert each block */ 30 switch (a->bs){ 31 case 2: 32 for (i=0; i<mbs; i++) { 33 odiag = v + 4*diag_offset[i]; 34 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 35 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 36 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 37 diag += 4; 38 mdiag += 4; 39 } 40 break; 41 case 3: 42 for (i=0; i<mbs; i++) { 43 odiag = v + 9*diag_offset[i]; 44 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 45 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 46 diag[8] = odiag[8]; 47 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 48 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 49 mdiag[8] = odiag[8]; 50 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 51 diag += 9; 52 mdiag += 9; 53 } 54 break; 55 case 4: 56 for (i=0; i<mbs; i++) { 57 odiag = v + 16*diag_offset[i]; 58 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 61 diag += 16; 62 mdiag += 16; 63 } 64 break; 65 case 5: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 25*diag_offset[i]; 68 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 71 diag += 25; 72 mdiag += 25; 73 } 74 break; 75 default: 76 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",a->bs); 77 } 78 a->idiagvalid = PETSC_TRUE; 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 87 PetscScalar *x,x1,x2,s1,s2; 88 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 89 PetscErrorCode ierr; 90 PetscInt m = a->mbs,i,i2,nz,idx; 91 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 PetscErrorCode ierr; 192 PetscInt m = a->mbs,i,i2,nz,idx; 193 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 194 195 PetscFunctionBegin; 196 its = its*lits; 197 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 198 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 199 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 200 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 201 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 202 203 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 204 205 diag = a->diag; 206 idiag = a->idiag; 207 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 210 if (flag & SOR_ZERO_INITIAL_GUESS) { 211 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 212 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 213 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 214 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 215 i2 = 3; 216 idiag += 9; 217 for (i=1; i<m; i++) { 218 v = aa + 9*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 222 while (nz--) { 223 idx = 3*(*vi++); 224 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 225 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 226 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 227 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 228 v += 9; 229 } 230 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 231 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 232 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 233 idiag += 9; 234 i2 += 3; 235 } 236 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 237 PetscLogFlops(9*(a->nz)); 238 } 239 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 240 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 241 i2 = 0; 242 mdiag = a->idiag+9*a->mbs; 243 for (i=0; i<m; i++) { 244 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 245 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 246 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 247 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 248 mdiag += 9; 249 i2 += 3; 250 } 251 PetscLogFlops(15*m); 252 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 253 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 256 idiag = a->idiag+9*a->mbs - 9; 257 i2 = 3*m - 3; 258 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 259 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 260 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 261 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 262 idiag -= 9; 263 i2 -= 3; 264 for (i=m-2; i>=0; i--) { 265 v = aa + 9*(diag[i]+1); 266 vi = aj + diag[i] + 1; 267 nz = ai[i+1] - diag[i] - 1; 268 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 269 while (nz--) { 270 idx = 3*(*vi++); 271 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 272 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 273 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 274 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 275 v += 9; 276 } 277 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 278 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 279 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 280 idiag -= 9; 281 i2 -= 3; 282 } 283 PetscLogFlops(9*(a->nz)); 284 } 285 } else { 286 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 287 } 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 296 { 297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 298 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 299 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 300 PetscErrorCode ierr; 301 PetscInt m = a->mbs,i,i2,nz,idx; 302 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 303 304 PetscFunctionBegin; 305 its = its*lits; 306 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 307 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 308 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 309 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 310 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 311 312 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 313 314 diag = a->diag; 315 idiag = a->idiag; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 318 319 if (flag & SOR_ZERO_INITIAL_GUESS) { 320 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 321 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 322 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 323 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 324 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 325 i2 = 4; 326 idiag += 16; 327 for (i=1; i<m; i++) { 328 v = aa + 16*ai[i]; 329 vi = aj + ai[i]; 330 nz = diag[i] - ai[i]; 331 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 332 while (nz--) { 333 idx = 4*(*vi++); 334 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 335 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 336 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 337 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 338 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 339 v += 16; 340 } 341 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 342 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 343 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 344 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 345 idiag += 16; 346 i2 += 4; 347 } 348 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 349 PetscLogFlops(16*(a->nz)); 350 } 351 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 352 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 353 i2 = 0; 354 mdiag = a->idiag+16*a->mbs; 355 for (i=0; i<m; i++) { 356 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 357 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 358 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 359 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 360 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 361 mdiag += 16; 362 i2 += 4; 363 } 364 PetscLogFlops(28*m); 365 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 366 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 367 } 368 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 369 idiag = a->idiag+16*a->mbs - 16; 370 i2 = 4*m - 4; 371 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 372 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 373 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 374 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 375 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 376 idiag -= 16; 377 i2 -= 4; 378 for (i=m-2; i>=0; i--) { 379 v = aa + 16*(diag[i]+1); 380 vi = aj + diag[i] + 1; 381 nz = ai[i+1] - diag[i] - 1; 382 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 383 while (nz--) { 384 idx = 4*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 386 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 387 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 388 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 389 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 390 v += 16; 391 } 392 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 393 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 394 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 395 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 396 idiag -= 16; 397 i2 -= 4; 398 } 399 PetscLogFlops(16*(a->nz)); 400 } 401 } else { 402 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 403 } 404 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 414 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 415 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 416 PetscErrorCode ierr; 417 PetscInt m = a->mbs,i,i2,nz,idx; 418 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 419 420 PetscFunctionBegin; 421 its = its*lits; 422 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 423 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 424 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 425 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 426 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 427 428 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 429 430 diag = a->diag; 431 idiag = a->idiag; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 434 435 if (flag & SOR_ZERO_INITIAL_GUESS) { 436 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 437 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 438 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 439 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 440 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 441 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 442 i2 = 5; 443 idiag += 25; 444 for (i=1; i<m; i++) { 445 v = aa + 25*ai[i]; 446 vi = aj + ai[i]; 447 nz = diag[i] - ai[i]; 448 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 449 while (nz--) { 450 idx = 5*(*vi++); 451 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 452 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 453 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 454 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 455 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 456 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 457 v += 25; 458 } 459 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 460 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 461 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 462 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 463 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 464 idiag += 25; 465 i2 += 5; 466 } 467 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 468 PetscLogFlops(25*(a->nz)); 469 } 470 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 471 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 472 i2 = 0; 473 mdiag = a->idiag+25*a->mbs; 474 for (i=0; i<m; i++) { 475 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 476 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 477 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 478 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 479 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 480 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 481 mdiag += 25; 482 i2 += 5; 483 } 484 PetscLogFlops(45*m); 485 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 486 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 489 idiag = a->idiag+25*a->mbs - 25; 490 i2 = 5*m - 5; 491 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 492 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 493 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 494 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 495 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 496 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 497 idiag -= 25; 498 i2 -= 5; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 25*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 504 while (nz--) { 505 idx = 5*(*vi++); 506 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 507 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 508 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 509 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 510 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 511 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 512 v += 25; 513 } 514 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 515 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 516 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 517 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 518 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 519 idiag -= 25; 520 i2 -= 5; 521 } 522 PetscLogFlops(25*(a->nz)); 523 } 524 } else { 525 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 526 } 527 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 Special version for Fun3d sequential benchmark 534 */ 535 #if defined(PETSC_HAVE_FORTRAN_CAPS) 536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 538 #define matsetvaluesblocked4_ matsetvaluesblocked4 539 #endif 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "matsetvaluesblocked4_" 544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 545 { 546 Mat A = *AA; 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 549 PetscInt *ai=a->i,*ailen=a->ilen; 550 PetscInt *aj=a->j,stepval; 551 const PetscScalar *value = v; 552 MatScalar *ap,*aa = a->a,*bap; 553 554 PetscFunctionBegin; 555 stepval = (n-1)*4; 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 rp = aj + ai[row]; 559 ap = aa + 16*ai[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 col = in[l]; 564 value = v + k*(stepval+4)*4 + l*4; 565 low = 0; high = nrow; 566 while (high-low > 7) { 567 t = (low+high)/2; 568 if (rp[t] > col) high = t; 569 else low = t; 570 } 571 for (i=low; i<high; i++) { 572 if (rp[i] > col) break; 573 if (rp[i] == col) { 574 bap = ap + 16*i; 575 for (ii=0; ii<4; ii++,value+=stepval) { 576 for (jj=ii; jj<16; jj+=4) { 577 bap[jj] += *value++; 578 } 579 } 580 goto noinsert2; 581 } 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for (ii=N; ii>=i; ii--) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 588 } 589 if (N >= i) { 590 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 591 } 592 rp[i] = col; 593 bap = ap + 16*i; 594 for (ii=0; ii<4; ii++,value+=stepval) { 595 for (jj=ii; jj<16; jj+=4) { 596 bap[jj] = *value++; 597 } 598 } 599 noinsert2:; 600 low = i; 601 } 602 ailen[row] = nrow; 603 } 604 } 605 EXTERN_C_END 606 607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 608 #define matsetvalues4_ MATSETVALUES4 609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 610 #define matsetvalues4_ matsetvalues4 611 #endif 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatSetValues4_" 616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 617 { 618 Mat A = *AA; 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 621 PetscInt *ai=a->i,*ailen=a->ilen; 622 PetscInt *aj=a->j,brow,bcol; 623 PetscInt ridx,cidx; 624 MatScalar *ap,value,*aa=a->a,*bap; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over added rows */ 628 row = im[k]; brow = row/4; 629 rp = aj + ai[brow]; 630 ap = aa + 16*ai[brow]; 631 nrow = ailen[brow]; 632 low = 0; 633 for (l=0; l<n; l++) { /* loop over added columns */ 634 col = in[l]; bcol = col/4; 635 ridx = row % 4; cidx = col % 4; 636 value = v[l + k*n]; 637 low = 0; high = nrow; 638 while (high-low > 7) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 bap = ap + 16*i + 4*cidx + ridx; 647 *bap += value; 648 goto noinsert1; 649 } 650 } 651 N = nrow++ - 1; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 656 } 657 if (N>=i) { 658 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 659 } 660 rp[i] = bcol; 661 ap[16*i + 4*cidx + ridx] = value; 662 noinsert1:; 663 low = i; 664 } 665 ailen[brow] = nrow; 666 } 667 } 668 EXTERN_C_END 669 670 /* UGLY, ugly, ugly 671 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 672 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 673 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 674 converts the entries into single precision and then calls ..._MatScalar() to put them 675 into the single precision data structures. 676 */ 677 #if defined(PETSC_USE_MAT_SINGLE) 678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 679 #else 680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 681 #endif 682 683 #define CHUNKSIZE 10 684 685 /* 686 Checks for missing diagonals 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt *diag,*jj = a->j,i; 695 696 PetscFunctionBegin; 697 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 698 diag = a->diag; 699 for (i=0; i<a->mbs; i++) { 700 if (jj[diag[i]] != i) { 701 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt i,j,*diag,m = a->mbs; 714 715 PetscFunctionBegin; 716 if (a->diag) PetscFunctionReturn(0); 717 718 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 719 PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt)); 720 for (i=0; i<m; i++) { 721 diag[i] = a->i[i+1]; 722 for (j=a->i[i]; j<a->i[i+1]; j++) { 723 if (a->j[j] == i) { 724 diag[i] = j; 725 break; 726 } 727 } 728 } 729 a->diag = diag; 730 PetscFunctionReturn(0); 731 } 732 733 734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 741 PetscErrorCode ierr; 742 PetscInt n = a->mbs,i; 743 744 PetscFunctionBegin; 745 *nn = n; 746 if (!ia) PetscFunctionReturn(0); 747 if (symmetric) { 748 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 749 } else if (oshift == 1) { 750 /* temporarily add 1 to i and j indices */ 751 PetscInt nz = a->i[n]; 752 for (i=0; i<nz; i++) a->j[i]++; 753 for (i=0; i<n+1; i++) a->i[i]++; 754 *ia = a->i; *ja = a->j; 755 } else { 756 *ia = a->i; *ja = a->j; 757 } 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 765 { 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscInt i,n = a->mbs; 769 770 PetscFunctionBegin; 771 if (!ia) PetscFunctionReturn(0); 772 if (symmetric) { 773 ierr = PetscFree(*ia);CHKERRQ(ierr); 774 ierr = PetscFree(*ja);CHKERRQ(ierr); 775 } else if (oshift == 1) { 776 PetscInt nz = a->i[n]-1; 777 for (i=0; i<nz; i++) a->j[i]--; 778 for (i=0; i<n+1; i++) a->i[i]--; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 785 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,PetscInt *bs) 786 { 787 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 788 789 PetscFunctionBegin; 790 *bs = baij->bs; 791 PetscFunctionReturn(0); 792 } 793 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatDestroy_SeqBAIJ" 797 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 798 { 799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 800 PetscErrorCode ierr; 801 802 PetscFunctionBegin; 803 #if defined(PETSC_USE_LOG) 804 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 805 #endif 806 ierr = PetscFree(a->a);CHKERRQ(ierr); 807 if (!a->singlemalloc) { 808 ierr = PetscFree(a->i);CHKERRQ(ierr); 809 ierr = PetscFree(a->j);CHKERRQ(ierr); 810 } 811 if (a->row) { 812 ierr = ISDestroy(a->row);CHKERRQ(ierr); 813 } 814 if (a->col) { 815 ierr = ISDestroy(a->col);CHKERRQ(ierr); 816 } 817 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 818 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 819 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 820 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 821 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 822 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 823 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 824 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 825 #if defined(PETSC_USE_MAT_SINGLE) 826 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 827 #endif 828 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 829 830 ierr = PetscFree(a);CHKERRQ(ierr); 831 832 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 833 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 834 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 836 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 837 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatSetOption_SeqBAIJ" 843 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 844 { 845 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 846 847 PetscFunctionBegin; 848 switch (op) { 849 case MAT_ROW_ORIENTED: 850 a->roworiented = PETSC_TRUE; 851 break; 852 case MAT_COLUMN_ORIENTED: 853 a->roworiented = PETSC_FALSE; 854 break; 855 case MAT_COLUMNS_SORTED: 856 a->sorted = PETSC_TRUE; 857 break; 858 case MAT_COLUMNS_UNSORTED: 859 a->sorted = PETSC_FALSE; 860 break; 861 case MAT_KEEP_ZEROED_ROWS: 862 a->keepzeroedrows = PETSC_TRUE; 863 break; 864 case MAT_NO_NEW_NONZERO_LOCATIONS: 865 a->nonew = 1; 866 break; 867 case MAT_NEW_NONZERO_LOCATION_ERR: 868 a->nonew = -1; 869 break; 870 case MAT_NEW_NONZERO_ALLOCATION_ERR: 871 a->nonew = -2; 872 break; 873 case MAT_YES_NEW_NONZERO_LOCATIONS: 874 a->nonew = 0; 875 break; 876 case MAT_ROWS_SORTED: 877 case MAT_ROWS_UNSORTED: 878 case MAT_YES_NEW_DIAGONALS: 879 case MAT_IGNORE_OFF_PROC_ENTRIES: 880 case MAT_USE_HASH_TABLE: 881 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 882 break; 883 case MAT_NO_NEW_DIAGONALS: 884 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 885 case MAT_SYMMETRIC: 886 case MAT_STRUCTURALLY_SYMMETRIC: 887 case MAT_NOT_SYMMETRIC: 888 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 889 case MAT_HERMITIAN: 890 case MAT_NOT_HERMITIAN: 891 case MAT_SYMMETRY_ETERNAL: 892 case MAT_NOT_SYMMETRY_ETERNAL: 893 break; 894 default: 895 SETERRQ(PETSC_ERR_SUP,"unknown option"); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatGetRow_SeqBAIJ" 902 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 903 { 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 907 MatScalar *aa,*aa_i; 908 PetscScalar *v_i; 909 910 PetscFunctionBegin; 911 bs = a->bs; 912 ai = a->i; 913 aj = a->j; 914 aa = a->a; 915 bs2 = a->bs2; 916 917 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 918 919 bn = row/bs; /* Block number */ 920 bp = row % bs; /* Block Position */ 921 M = ai[bn+1] - ai[bn]; 922 *nz = bs*M; 923 924 if (v) { 925 *v = 0; 926 if (*nz) { 927 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 928 for (i=0; i<M; i++) { /* for each block in the block row */ 929 v_i = *v + i*bs; 930 aa_i = aa + bs2*(ai[bn] + i); 931 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 932 } 933 } 934 } 935 936 if (idx) { 937 *idx = 0; 938 if (*nz) { 939 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 940 for (i=0; i<M; i++) { /* for each block in the block row */ 941 idx_i = *idx + i*bs; 942 itmp = bs*aj[ai[bn] + i]; 943 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 944 } 945 } 946 } 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 952 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 953 { 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 958 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatTranspose_SeqBAIJ" 964 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 965 { 966 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 967 Mat C; 968 PetscErrorCode ierr; 969 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 970 PetscInt *rows,*cols,bs2=a->bs2; 971 PetscScalar *array; 972 973 PetscFunctionBegin; 974 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 975 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 976 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 977 978 #if defined(PETSC_USE_MAT_SINGLE) 979 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 980 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 981 #else 982 array = a->a; 983 #endif 984 985 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 986 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 987 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 988 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 989 ierr = PetscFree(col);CHKERRQ(ierr); 990 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 991 cols = rows + bs; 992 for (i=0; i<mbs; i++) { 993 cols[0] = i*bs; 994 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 995 len = ai[i+1] - ai[i]; 996 for (j=0; j<len; j++) { 997 rows[0] = (*aj++)*bs; 998 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 999 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1000 array += bs2; 1001 } 1002 } 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 #if defined(PETSC_USE_MAT_SINGLE) 1005 ierr = PetscFree(array); 1006 #endif 1007 1008 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1010 1011 if (B) { 1012 *B = C; 1013 } else { 1014 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1015 } 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1021 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1022 { 1023 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 PetscInt i,*col_lens,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1026 int fd; 1027 PetscScalar *aa; 1028 FILE *file; 1029 1030 PetscFunctionBegin; 1031 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1032 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1033 col_lens[0] = MAT_FILE_COOKIE; 1034 1035 col_lens[1] = A->m; 1036 col_lens[2] = A->n; 1037 col_lens[3] = a->nz*bs2; 1038 1039 /* store lengths of each row and write (including header) to file */ 1040 count = 0; 1041 for (i=0; i<a->mbs; i++) { 1042 for (j=0; j<bs; j++) { 1043 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1044 } 1045 } 1046 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1047 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1048 1049 /* store column indices (zero start index) */ 1050 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1051 count = 0; 1052 for (i=0; i<a->mbs; i++) { 1053 for (j=0; j<bs; j++) { 1054 for (k=a->i[i]; k<a->i[i+1]; k++) { 1055 for (l=0; l<bs; l++) { 1056 jj[count++] = bs*a->j[k] + l; 1057 } 1058 } 1059 } 1060 } 1061 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1062 ierr = PetscFree(jj);CHKERRQ(ierr); 1063 1064 /* store nonzero values */ 1065 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1066 count = 0; 1067 for (i=0; i<a->mbs; i++) { 1068 for (j=0; j<bs; j++) { 1069 for (k=a->i[i]; k<a->i[i+1]; k++) { 1070 for (l=0; l<bs; l++) { 1071 aa[count++] = a->a[bs2*k + l*bs + j]; 1072 } 1073 } 1074 } 1075 } 1076 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1077 ierr = PetscFree(aa);CHKERRQ(ierr); 1078 1079 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1080 if (file) { 1081 fprintf(file,"-matload_block_size %d\n",(int)a->bs); 1082 } 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1088 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1089 { 1090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt i,j,bs = a->bs,k,l,bs2=a->bs2; 1093 PetscViewerFormat format; 1094 1095 PetscFunctionBegin; 1096 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1097 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1098 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1099 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1100 Mat aij; 1101 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1102 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1103 ierr = MatDestroy(aij);CHKERRQ(ierr); 1104 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1105 PetscFunctionReturn(0); 1106 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1107 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1108 for (i=0; i<a->mbs; i++) { 1109 for (j=0; j<bs; j++) { 1110 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1111 for (k=a->i[i]; k<a->i[i+1]; k++) { 1112 for (l=0; l<bs; l++) { 1113 #if defined(PETSC_USE_COMPLEX) 1114 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1115 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1116 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1117 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1118 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1119 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1120 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1121 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1122 } 1123 #else 1124 if (a->a[bs2*k + l*bs + j] != 0.0) { 1125 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1126 } 1127 #endif 1128 } 1129 } 1130 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1131 } 1132 } 1133 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1134 } else { 1135 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1136 for (i=0; i<a->mbs; i++) { 1137 for (j=0; j<bs; j++) { 1138 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1139 for (k=a->i[i]; k<a->i[i+1]; k++) { 1140 for (l=0; l<bs; l++) { 1141 #if defined(PETSC_USE_COMPLEX) 1142 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1143 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1144 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1145 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1147 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1148 } else { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1150 } 1151 #else 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1153 #endif 1154 } 1155 } 1156 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1157 } 1158 } 1159 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1160 } 1161 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1167 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1168 { 1169 Mat A = (Mat) Aa; 1170 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1171 PetscErrorCode ierr; 1172 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1173 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1174 MatScalar *aa; 1175 PetscViewer viewer; 1176 1177 PetscFunctionBegin; 1178 1179 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1180 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1181 1182 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1183 1184 /* loop over matrix elements drawing boxes */ 1185 color = PETSC_DRAW_BLUE; 1186 for (i=0,row=0; i<mbs; i++,row+=bs) { 1187 for (j=a->i[i]; j<a->i[i+1]; j++) { 1188 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1189 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1190 aa = a->a + j*bs2; 1191 for (k=0; k<bs; k++) { 1192 for (l=0; l<bs; l++) { 1193 if (PetscRealPart(*aa++) >= 0.) continue; 1194 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1195 } 1196 } 1197 } 1198 } 1199 color = PETSC_DRAW_CYAN; 1200 for (i=0,row=0; i<mbs; i++,row+=bs) { 1201 for (j=a->i[i]; j<a->i[i+1]; j++) { 1202 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1203 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1204 aa = a->a + j*bs2; 1205 for (k=0; k<bs; k++) { 1206 for (l=0; l<bs; l++) { 1207 if (PetscRealPart(*aa++) != 0.) continue; 1208 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1209 } 1210 } 1211 } 1212 } 1213 1214 color = PETSC_DRAW_RED; 1215 for (i=0,row=0; i<mbs; i++,row+=bs) { 1216 for (j=a->i[i]; j<a->i[i+1]; j++) { 1217 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1218 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1219 aa = a->a + j*bs2; 1220 for (k=0; k<bs; k++) { 1221 for (l=0; l<bs; l++) { 1222 if (PetscRealPart(*aa++) <= 0.) continue; 1223 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1233 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1234 { 1235 PetscErrorCode ierr; 1236 PetscReal xl,yl,xr,yr,w,h; 1237 PetscDraw draw; 1238 PetscTruth isnull; 1239 1240 PetscFunctionBegin; 1241 1242 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1243 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1244 1245 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1246 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1247 xr += w; yr += h; xl = -w; yl = -h; 1248 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1249 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1250 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatView_SeqBAIJ" 1256 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1257 { 1258 PetscErrorCode ierr; 1259 PetscTruth iascii,isbinary,isdraw; 1260 1261 PetscFunctionBegin; 1262 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1263 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1264 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1265 if (iascii){ 1266 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1267 } else if (isbinary) { 1268 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1269 } else if (isdraw) { 1270 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1271 } else { 1272 Mat B; 1273 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1274 ierr = MatView(B,viewer);CHKERRQ(ierr); 1275 ierr = MatDestroy(B);CHKERRQ(ierr); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1283 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1284 { 1285 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1286 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1287 PetscInt *ai = a->i,*ailen = a->ilen; 1288 PetscInt brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1289 MatScalar *ap,*aa = a->a,zero = 0.0; 1290 1291 PetscFunctionBegin; 1292 for (k=0; k<m; k++) { /* loop over rows */ 1293 row = im[k]; brow = row/bs; 1294 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1295 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1296 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1297 nrow = ailen[brow]; 1298 for (l=0; l<n; l++) { /* loop over columns */ 1299 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1300 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1301 col = in[l] ; 1302 bcol = col/bs; 1303 cidx = col%bs; 1304 ridx = row%bs; 1305 high = nrow; 1306 low = 0; /* assume unsorted */ 1307 while (high-low > 5) { 1308 t = (low+high)/2; 1309 if (rp[t] > bcol) high = t; 1310 else low = t; 1311 } 1312 for (i=low; i<high; i++) { 1313 if (rp[i] > bcol) break; 1314 if (rp[i] == bcol) { 1315 *v++ = ap[bs2*i+bs*cidx+ridx]; 1316 goto finished; 1317 } 1318 } 1319 *v++ = zero; 1320 finished:; 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #if defined(PETSC_USE_MAT_SINGLE) 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1329 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1330 { 1331 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1332 PetscErrorCode ierr; 1333 PetscInt i,N = m*n*b->bs2; 1334 MatScalar *vsingle; 1335 1336 PetscFunctionBegin; 1337 if (N > b->setvalueslen) { 1338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1340 b->setvalueslen = N; 1341 } 1342 vsingle = b->setvaluescopy; 1343 for (i=0; i<N; i++) { 1344 vsingle[i] = v[i]; 1345 } 1346 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 #endif 1350 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1354 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1355 { 1356 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1357 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1358 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1359 PetscErrorCode ierr; 1360 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 1361 PetscTruth roworiented=a->roworiented; 1362 const MatScalar *value = v; 1363 MatScalar *ap,*aa = a->a,*bap; 1364 1365 PetscFunctionBegin; 1366 if (roworiented) { 1367 stepval = (n-1)*bs; 1368 } else { 1369 stepval = (m-1)*bs; 1370 } 1371 for (k=0; k<m; k++) { /* loop over added rows */ 1372 row = im[k]; 1373 if (row < 0) continue; 1374 #if defined(PETSC_USE_BOPT_g) 1375 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1376 #endif 1377 rp = aj + ai[row]; 1378 ap = aa + bs2*ai[row]; 1379 rmax = imax[row]; 1380 nrow = ailen[row]; 1381 low = 0; 1382 for (l=0; l<n; l++) { /* loop over added columns */ 1383 if (in[l] < 0) continue; 1384 #if defined(PETSC_USE_BOPT_g) 1385 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1386 #endif 1387 col = in[l]; 1388 if (roworiented) { 1389 value = v + k*(stepval+bs)*bs + l*bs; 1390 } else { 1391 value = v + l*(stepval+bs)*bs + k*bs; 1392 } 1393 if (!sorted) low = 0; high = nrow; 1394 while (high-low > 7) { 1395 t = (low+high)/2; 1396 if (rp[t] > col) high = t; 1397 else low = t; 1398 } 1399 for (i=low; i<high; i++) { 1400 if (rp[i] > col) break; 1401 if (rp[i] == col) { 1402 bap = ap + bs2*i; 1403 if (roworiented) { 1404 if (is == ADD_VALUES) { 1405 for (ii=0; ii<bs; ii++,value+=stepval) { 1406 for (jj=ii; jj<bs2; jj+=bs) { 1407 bap[jj] += *value++; 1408 } 1409 } 1410 } else { 1411 for (ii=0; ii<bs; ii++,value+=stepval) { 1412 for (jj=ii; jj<bs2; jj+=bs) { 1413 bap[jj] = *value++; 1414 } 1415 } 1416 } 1417 } else { 1418 if (is == ADD_VALUES) { 1419 for (ii=0; ii<bs; ii++,value+=stepval) { 1420 for (jj=0; jj<bs; jj++) { 1421 *bap++ += *value++; 1422 } 1423 } 1424 } else { 1425 for (ii=0; ii<bs; ii++,value+=stepval) { 1426 for (jj=0; jj<bs; jj++) { 1427 *bap++ = *value++; 1428 } 1429 } 1430 } 1431 } 1432 goto noinsert2; 1433 } 1434 } 1435 if (nonew == 1) goto noinsert2; 1436 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1437 if (nrow >= rmax) { 1438 /* there is no extra room in row, therefore enlarge */ 1439 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1440 MatScalar *new_a; 1441 1442 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1443 1444 /* malloc new storage space */ 1445 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1446 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1447 new_j = (PetscInt*)(new_a + bs2*new_nz); 1448 new_i = new_j + new_nz; 1449 1450 /* copy over old data into new slots */ 1451 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1452 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1453 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1454 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1455 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1456 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1457 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1458 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1459 /* free up old matrix storage */ 1460 ierr = PetscFree(a->a);CHKERRQ(ierr); 1461 if (!a->singlemalloc) { 1462 ierr = PetscFree(a->i);CHKERRQ(ierr); 1463 ierr = PetscFree(a->j);CHKERRQ(ierr); 1464 } 1465 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1466 a->singlemalloc = PETSC_TRUE; 1467 1468 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1469 rmax = imax[row] = imax[row] + CHUNKSIZE; 1470 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 1471 a->maxnz += bs2*CHUNKSIZE; 1472 a->reallocs++; 1473 a->nz++; 1474 } 1475 N = nrow++ - 1; 1476 /* shift up all the later entries in this row */ 1477 for (ii=N; ii>=i; ii--) { 1478 rp[ii+1] = rp[ii]; 1479 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1480 } 1481 if (N >= i) { 1482 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1483 } 1484 rp[i] = col; 1485 bap = ap + bs2*i; 1486 if (roworiented) { 1487 for (ii=0; ii<bs; ii++,value+=stepval) { 1488 for (jj=ii; jj<bs2; jj+=bs) { 1489 bap[jj] = *value++; 1490 } 1491 } 1492 } else { 1493 for (ii=0; ii<bs; ii++,value+=stepval) { 1494 for (jj=0; jj<bs; jj++) { 1495 *bap++ = *value++; 1496 } 1497 } 1498 } 1499 noinsert2:; 1500 low = i; 1501 } 1502 ailen[row] = nrow; 1503 } 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1509 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1510 { 1511 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1512 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1513 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1514 PetscErrorCode ierr; 1515 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1516 MatScalar *aa = a->a,*ap; 1517 1518 PetscFunctionBegin; 1519 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1520 1521 if (m) rmax = ailen[0]; 1522 for (i=1; i<mbs; i++) { 1523 /* move each row back by the amount of empty slots (fshift) before it*/ 1524 fshift += imax[i-1] - ailen[i-1]; 1525 rmax = PetscMax(rmax,ailen[i]); 1526 if (fshift) { 1527 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1528 N = ailen[i]; 1529 for (j=0; j<N; j++) { 1530 ip[j-fshift] = ip[j]; 1531 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1532 } 1533 } 1534 ai[i] = ai[i-1] + ailen[i-1]; 1535 } 1536 if (mbs) { 1537 fshift += imax[mbs-1] - ailen[mbs-1]; 1538 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1539 } 1540 /* reset ilen and imax for each row */ 1541 for (i=0; i<mbs; i++) { 1542 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1543 } 1544 a->nz = ai[mbs]; 1545 1546 /* diagonals may have moved, so kill the diagonal pointers */ 1547 a->idiagvalid = PETSC_FALSE; 1548 if (fshift && a->diag) { 1549 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1550 PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt)); 1551 a->diag = 0; 1552 } 1553 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1554 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs); 1555 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 1556 a->reallocs = 0; 1557 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1558 1559 PetscFunctionReturn(0); 1560 } 1561 1562 1563 1564 /* 1565 This function returns an array of flags which indicate the locations of contiguous 1566 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1567 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1568 Assume: sizes should be long enough to hold all the values. 1569 */ 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1573 { 1574 PetscInt i,j,k,row; 1575 PetscTruth flg; 1576 1577 PetscFunctionBegin; 1578 for (i=0,j=0; i<n; j++) { 1579 row = idx[i]; 1580 if (row%bs!=0) { /* Not the begining of a block */ 1581 sizes[j] = 1; 1582 i++; 1583 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1584 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1585 i++; 1586 } else { /* Begining of the block, so check if the complete block exists */ 1587 flg = PETSC_TRUE; 1588 for (k=1; k<bs; k++) { 1589 if (row+k != idx[i+k]) { /* break in the block */ 1590 flg = PETSC_FALSE; 1591 break; 1592 } 1593 } 1594 if (flg == PETSC_TRUE) { /* No break in the bs */ 1595 sizes[j] = bs; 1596 i+= bs; 1597 } else { 1598 sizes[j] = 1; 1599 i++; 1600 } 1601 } 1602 } 1603 *bs_max = j; 1604 PetscFunctionReturn(0); 1605 } 1606 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1610 { 1611 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1612 PetscErrorCode ierr; 1613 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1614 PetscInt bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1615 PetscScalar zero = 0.0; 1616 MatScalar *aa; 1617 1618 PetscFunctionBegin; 1619 /* Make a copy of the IS and sort it */ 1620 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1621 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1622 1623 /* allocate memory for rows,sizes */ 1624 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1625 sizes = rows + is_n; 1626 1627 /* copy IS values to rows, and sort them */ 1628 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1629 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1630 if (baij->keepzeroedrows) { 1631 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1632 bs_max = is_n; 1633 } else { 1634 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1635 } 1636 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1637 1638 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1639 row = rows[j]; 1640 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1641 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1642 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1643 if (sizes[i] == bs && !baij->keepzeroedrows) { 1644 if (diag) { 1645 if (baij->ilen[row/bs] > 0) { 1646 baij->ilen[row/bs] = 1; 1647 baij->j[baij->i[row/bs]] = row/bs; 1648 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1649 } 1650 /* Now insert all the diagonal values for this bs */ 1651 for (k=0; k<bs; k++) { 1652 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 } else { /* (!diag) */ 1655 baij->ilen[row/bs] = 0; 1656 } /* end (!diag) */ 1657 } else { /* (sizes[i] != bs) */ 1658 #if defined (PETSC_USE_DEBUG) 1659 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1660 #endif 1661 for (k=0; k<count; k++) { 1662 aa[0] = zero; 1663 aa += bs; 1664 } 1665 if (diag) { 1666 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 1671 ierr = PetscFree(rows);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1681 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1682 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1683 PetscInt *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1684 PetscErrorCode ierr; 1685 PetscInt ridx,cidx,bs2=a->bs2; 1686 PetscTruth roworiented=a->roworiented; 1687 MatScalar *ap,value,*aa=a->a,*bap; 1688 1689 PetscFunctionBegin; 1690 for (k=0; k<m; k++) { /* loop over added rows */ 1691 row = im[k]; brow = row/bs; 1692 if (row < 0) continue; 1693 #if defined(PETSC_USE_BOPT_g) 1694 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1695 #endif 1696 rp = aj + ai[brow]; 1697 ap = aa + bs2*ai[brow]; 1698 rmax = imax[brow]; 1699 nrow = ailen[brow]; 1700 low = 0; 1701 for (l=0; l<n; l++) { /* loop over added columns */ 1702 if (in[l] < 0) continue; 1703 #if defined(PETSC_USE_BOPT_g) 1704 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1705 #endif 1706 col = in[l]; bcol = col/bs; 1707 ridx = row % bs; cidx = col % bs; 1708 if (roworiented) { 1709 value = v[l + k*n]; 1710 } else { 1711 value = v[k + l*m]; 1712 } 1713 if (!sorted) low = 0; high = nrow; 1714 while (high-low > 7) { 1715 t = (low+high)/2; 1716 if (rp[t] > bcol) high = t; 1717 else low = t; 1718 } 1719 for (i=low; i<high; i++) { 1720 if (rp[i] > bcol) break; 1721 if (rp[i] == bcol) { 1722 bap = ap + bs2*i + bs*cidx + ridx; 1723 if (is == ADD_VALUES) *bap += value; 1724 else *bap = value; 1725 goto noinsert1; 1726 } 1727 } 1728 if (nonew == 1) goto noinsert1; 1729 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1730 if (nrow >= rmax) { 1731 /* there is no extra room in row, therefore enlarge */ 1732 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1733 MatScalar *new_a; 1734 1735 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1736 1737 /* Malloc new storage space */ 1738 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1739 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1740 new_j = (PetscInt*)(new_a + bs2*new_nz); 1741 new_i = new_j + new_nz; 1742 1743 /* copy over old data into new slots */ 1744 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1745 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1746 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1747 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1748 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1749 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1750 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1751 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1752 /* free up old matrix storage */ 1753 ierr = PetscFree(a->a);CHKERRQ(ierr); 1754 if (!a->singlemalloc) { 1755 ierr = PetscFree(a->i);CHKERRQ(ierr); 1756 ierr = PetscFree(a->j);CHKERRQ(ierr); 1757 } 1758 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1759 a->singlemalloc = PETSC_TRUE; 1760 1761 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1762 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1763 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 1764 a->maxnz += bs2*CHUNKSIZE; 1765 a->reallocs++; 1766 a->nz++; 1767 } 1768 N = nrow++ - 1; 1769 /* shift up all the later entries in this row */ 1770 for (ii=N; ii>=i; ii--) { 1771 rp[ii+1] = rp[ii]; 1772 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1773 } 1774 if (N>=i) { 1775 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1776 } 1777 rp[i] = bcol; 1778 ap[bs2*i + bs*cidx + ridx] = value; 1779 noinsert1:; 1780 low = i; 1781 } 1782 ailen[brow] = nrow; 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1790 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1791 { 1792 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1793 Mat outA; 1794 PetscErrorCode ierr; 1795 PetscTruth row_identity,col_identity; 1796 1797 PetscFunctionBegin; 1798 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1799 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1800 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1801 if (!row_identity || !col_identity) { 1802 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1803 } 1804 1805 outA = inA; 1806 inA->factor = FACTOR_LU; 1807 1808 if (!a->diag) { 1809 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1810 } 1811 1812 a->row = row; 1813 a->col = col; 1814 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1815 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1816 1817 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1818 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1819 PetscLogObjectParent(inA,a->icol); 1820 1821 /* 1822 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1823 for ILU(0) factorization with natural ordering 1824 */ 1825 if (a->bs < 8) { 1826 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1827 } else { 1828 if (!a->solve_work) { 1829 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1830 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1831 } 1832 } 1833 1834 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1835 1836 PetscFunctionReturn(0); 1837 } 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1840 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1841 { 1842 static PetscTruth called = PETSC_FALSE; 1843 MPI_Comm comm = A->comm; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1848 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1849 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 EXTERN_C_BEGIN 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1856 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1857 { 1858 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1859 PetscInt i,nz,nbs; 1860 1861 PetscFunctionBegin; 1862 nz = baij->maxnz/baij->bs2; 1863 nbs = baij->nbs; 1864 for (i=0; i<nz; i++) { 1865 baij->j[i] = indices[i]; 1866 } 1867 baij->nz = nz; 1868 for (i=0; i<nbs; i++) { 1869 baij->ilen[i] = baij->imax[i]; 1870 } 1871 1872 PetscFunctionReturn(0); 1873 } 1874 EXTERN_C_END 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1878 /*@ 1879 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1880 in the matrix. 1881 1882 Input Parameters: 1883 + mat - the SeqBAIJ matrix 1884 - indices - the column indices 1885 1886 Level: advanced 1887 1888 Notes: 1889 This can be called if you have precomputed the nonzero structure of the 1890 matrix and want to provide it to the matrix object to improve the performance 1891 of the MatSetValues() operation. 1892 1893 You MUST have set the correct numbers of nonzeros per row in the call to 1894 MatCreateSeqBAIJ(). 1895 1896 MUST be called before any calls to MatSetValues(); 1897 1898 @*/ 1899 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1900 { 1901 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1902 1903 PetscFunctionBegin; 1904 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1905 PetscValidPointer(indices,2); 1906 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1907 if (f) { 1908 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1909 } else { 1910 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1917 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1918 { 1919 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1920 PetscErrorCode ierr; 1921 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1922 PetscReal atmp; 1923 PetscScalar *x,zero = 0.0; 1924 MatScalar *aa; 1925 PetscInt ncols,brow,krow,kcol; 1926 1927 PetscFunctionBegin; 1928 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1929 bs = a->bs; 1930 aa = a->a; 1931 ai = a->i; 1932 aj = a->j; 1933 mbs = a->mbs; 1934 1935 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1936 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1937 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1938 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1939 for (i=0; i<mbs; i++) { 1940 ncols = ai[1] - ai[0]; ai++; 1941 brow = bs*i; 1942 for (j=0; j<ncols; j++){ 1943 /* bcol = bs*(*aj); */ 1944 for (kcol=0; kcol<bs; kcol++){ 1945 for (krow=0; krow<bs; krow++){ 1946 atmp = PetscAbsScalar(*aa); aa++; 1947 row = brow + krow; /* row index */ 1948 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1949 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1950 } 1951 } 1952 aj++; 1953 } 1954 } 1955 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1961 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1962 { 1963 PetscErrorCode ierr; 1964 1965 PetscFunctionBegin; 1966 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1972 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1973 { 1974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1975 PetscFunctionBegin; 1976 *array = a->a; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1982 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1983 { 1984 PetscFunctionBegin; 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #include "petscblaslapack.h" 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1991 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1992 { 1993 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1994 PetscErrorCode ierr; 1995 PetscInt i,bs=y->bs,j,bs2; 1996 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1997 1998 PetscFunctionBegin; 1999 if (str == SAME_NONZERO_PATTERN) { 2000 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2001 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2002 if (y->xtoy && y->XtoY != X) { 2003 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2004 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2005 } 2006 if (!y->xtoy) { /* get xtoy */ 2007 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2008 y->XtoY = X; 2009 } 2010 bs2 = bs*bs; 2011 for (i=0; i<x->nz; i++) { 2012 j = 0; 2013 while (j < bs2){ 2014 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2015 j++; 2016 } 2017 } 2018 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 2019 } else { 2020 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 /* -------------------------------------------------------------------*/ 2026 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2027 MatGetRow_SeqBAIJ, 2028 MatRestoreRow_SeqBAIJ, 2029 MatMult_SeqBAIJ_N, 2030 /* 4*/ MatMultAdd_SeqBAIJ_N, 2031 MatMultTranspose_SeqBAIJ, 2032 MatMultTransposeAdd_SeqBAIJ, 2033 MatSolve_SeqBAIJ_N, 2034 0, 2035 0, 2036 /*10*/ 0, 2037 MatLUFactor_SeqBAIJ, 2038 0, 2039 0, 2040 MatTranspose_SeqBAIJ, 2041 /*15*/ MatGetInfo_SeqBAIJ, 2042 MatEqual_SeqBAIJ, 2043 MatGetDiagonal_SeqBAIJ, 2044 MatDiagonalScale_SeqBAIJ, 2045 MatNorm_SeqBAIJ, 2046 /*20*/ 0, 2047 MatAssemblyEnd_SeqBAIJ, 2048 0, 2049 MatSetOption_SeqBAIJ, 2050 MatZeroEntries_SeqBAIJ, 2051 /*25*/ MatZeroRows_SeqBAIJ, 2052 MatLUFactorSymbolic_SeqBAIJ, 2053 MatLUFactorNumeric_SeqBAIJ_N, 2054 0, 2055 0, 2056 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2057 MatILUFactorSymbolic_SeqBAIJ, 2058 0, 2059 MatGetArray_SeqBAIJ, 2060 MatRestoreArray_SeqBAIJ, 2061 /*35*/ MatDuplicate_SeqBAIJ, 2062 0, 2063 0, 2064 MatILUFactor_SeqBAIJ, 2065 0, 2066 /*40*/ MatAXPY_SeqBAIJ, 2067 MatGetSubMatrices_SeqBAIJ, 2068 MatIncreaseOverlap_SeqBAIJ, 2069 MatGetValues_SeqBAIJ, 2070 0, 2071 /*45*/ MatPrintHelp_SeqBAIJ, 2072 MatScale_SeqBAIJ, 2073 0, 2074 0, 2075 0, 2076 /*50*/ MatGetBlockSize_SeqBAIJ, 2077 MatGetRowIJ_SeqBAIJ, 2078 MatRestoreRowIJ_SeqBAIJ, 2079 0, 2080 0, 2081 /*55*/ 0, 2082 0, 2083 0, 2084 0, 2085 MatSetValuesBlocked_SeqBAIJ, 2086 /*60*/ MatGetSubMatrix_SeqBAIJ, 2087 MatDestroy_SeqBAIJ, 2088 MatView_SeqBAIJ, 2089 MatGetPetscMaps_Petsc, 2090 0, 2091 /*65*/ 0, 2092 0, 2093 0, 2094 0, 2095 0, 2096 /*70*/ MatGetRowMax_SeqBAIJ, 2097 MatConvert_Basic, 2098 0, 2099 0, 2100 0, 2101 /*75*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*80*/ 0, 2107 0, 2108 0, 2109 0, 2110 MatLoad_SeqBAIJ, 2111 /*85*/ 0, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*90*/ 0, 2117 0, 2118 0, 2119 0, 2120 0, 2121 /*95*/ 0, 2122 0, 2123 0, 2124 0}; 2125 2126 EXTERN_C_BEGIN 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2129 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2130 { 2131 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2132 PetscInt nz = aij->i[mat->m]*aij->bs*aij->bs2; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 if (aij->nonew != 1) { 2137 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2138 } 2139 2140 /* allocate space for values if not already there */ 2141 if (!aij->saved_values) { 2142 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2143 } 2144 2145 /* copy values over */ 2146 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 EXTERN_C_END 2150 2151 EXTERN_C_BEGIN 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2154 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2155 { 2156 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2157 PetscErrorCode ierr; 2158 PetscInt nz = aij->i[mat->m]*aij->bs*aij->bs2; 2159 2160 PetscFunctionBegin; 2161 if (aij->nonew != 1) { 2162 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2163 } 2164 if (!aij->saved_values) { 2165 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2166 } 2167 2168 /* copy values over */ 2169 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 EXTERN_C_END 2173 2174 EXTERN_C_BEGIN 2175 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2176 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2177 EXTERN_C_END 2178 2179 EXTERN_C_BEGIN 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2182 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2183 { 2184 Mat_SeqBAIJ *b; 2185 PetscErrorCode ierr; 2186 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2187 PetscTruth flg; 2188 2189 PetscFunctionBegin; 2190 2191 B->preallocated = PETSC_TRUE; 2192 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2193 if (nnz && newbs != bs) { 2194 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2195 } 2196 bs = newbs; 2197 2198 mbs = B->m/bs; 2199 nbs = B->n/bs; 2200 bs2 = bs*bs; 2201 2202 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2203 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2204 } 2205 2206 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2207 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2208 if (nnz) { 2209 for (i=0; i<mbs; i++) { 2210 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2211 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2212 } 2213 } 2214 2215 b = (Mat_SeqBAIJ*)B->data; 2216 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2217 B->ops->solve = MatSolve_SeqBAIJ_Update; 2218 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2219 if (!flg) { 2220 switch (bs) { 2221 case 1: 2222 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2223 B->ops->mult = MatMult_SeqBAIJ_1; 2224 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2225 break; 2226 case 2: 2227 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2228 B->ops->mult = MatMult_SeqBAIJ_2; 2229 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2230 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2231 break; 2232 case 3: 2233 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2234 B->ops->mult = MatMult_SeqBAIJ_3; 2235 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2236 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2237 break; 2238 case 4: 2239 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2240 B->ops->mult = MatMult_SeqBAIJ_4; 2241 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2242 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2243 break; 2244 case 5: 2245 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2246 B->ops->mult = MatMult_SeqBAIJ_5; 2247 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2248 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2249 break; 2250 case 6: 2251 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2252 B->ops->mult = MatMult_SeqBAIJ_6; 2253 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2254 break; 2255 case 7: 2256 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2257 B->ops->mult = MatMult_SeqBAIJ_7; 2258 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2259 break; 2260 default: 2261 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2262 B->ops->mult = MatMult_SeqBAIJ_N; 2263 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2264 break; 2265 } 2266 } 2267 b->bs = bs; 2268 b->mbs = mbs; 2269 b->nbs = nbs; 2270 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2271 if (!nnz) { 2272 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2273 else if (nz <= 0) nz = 1; 2274 for (i=0; i<mbs; i++) b->imax[i] = nz; 2275 nz = nz*mbs; 2276 } else { 2277 nz = 0; 2278 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2279 } 2280 2281 /* allocate the matrix space */ 2282 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2283 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2284 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2285 b->j = (PetscInt*)(b->a + nz*bs2); 2286 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2287 b->i = b->j + nz; 2288 b->singlemalloc = PETSC_TRUE; 2289 2290 b->i[0] = 0; 2291 for (i=1; i<mbs+1; i++) { 2292 b->i[i] = b->i[i-1] + b->imax[i-1]; 2293 } 2294 2295 /* b->ilen will count nonzeros in each block row so far. */ 2296 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2297 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2298 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2299 2300 b->bs = bs; 2301 b->bs2 = bs2; 2302 b->mbs = mbs; 2303 b->nz = 0; 2304 b->maxnz = nz*bs2; 2305 B->info.nz_unneeded = (PetscReal)b->maxnz; 2306 PetscFunctionReturn(0); 2307 } 2308 EXTERN_C_END 2309 2310 /*MC 2311 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2312 block sparse compressed row format. 2313 2314 Options Database Keys: 2315 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2316 2317 Level: beginner 2318 2319 .seealso: MatCreateSeqBAIJ 2320 M*/ 2321 2322 EXTERN_C_BEGIN 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatCreate_SeqBAIJ" 2325 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2326 { 2327 PetscErrorCode ierr; 2328 PetscMPIInt size; 2329 Mat_SeqBAIJ *b; 2330 2331 PetscFunctionBegin; 2332 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2333 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2334 2335 B->m = B->M = PetscMax(B->m,B->M); 2336 B->n = B->N = PetscMax(B->n,B->N); 2337 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2338 B->data = (void*)b; 2339 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2340 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2341 B->factor = 0; 2342 B->lupivotthreshold = 1.0; 2343 B->mapping = 0; 2344 b->row = 0; 2345 b->col = 0; 2346 b->icol = 0; 2347 b->reallocs = 0; 2348 b->saved_values = 0; 2349 #if defined(PETSC_USE_MAT_SINGLE) 2350 b->setvalueslen = 0; 2351 b->setvaluescopy = PETSC_NULL; 2352 #endif 2353 2354 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2355 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2356 2357 b->sorted = PETSC_FALSE; 2358 b->roworiented = PETSC_TRUE; 2359 b->nonew = 0; 2360 b->diag = 0; 2361 b->solve_work = 0; 2362 b->mult_work = 0; 2363 B->spptr = 0; 2364 B->info.nz_unneeded = (PetscReal)b->maxnz; 2365 b->keepzeroedrows = PETSC_FALSE; 2366 b->xtoy = 0; 2367 b->XtoY = 0; 2368 2369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2370 "MatStoreValues_SeqBAIJ", 2371 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2373 "MatRetrieveValues_SeqBAIJ", 2374 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2376 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2377 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2379 "MatConvert_SeqBAIJ_SeqAIJ", 2380 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2381 #if !defined(PETSC_USE_64BIT_INT) 2382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2383 "MatConvert_SeqBAIJ_SeqSBAIJ", 2384 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2385 #endif 2386 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2387 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2388 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 EXTERN_C_END 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2395 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2396 { 2397 Mat C; 2398 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2399 PetscErrorCode ierr; 2400 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2401 2402 PetscFunctionBegin; 2403 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2404 2405 *B = 0; 2406 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2407 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2408 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2409 c = (Mat_SeqBAIJ*)C->data; 2410 2411 C->M = A->M; 2412 C->N = A->N; 2413 c->bs = a->bs; 2414 c->bs2 = a->bs2; 2415 c->mbs = a->mbs; 2416 c->nbs = a->nbs; 2417 2418 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2419 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2420 for (i=0; i<mbs; i++) { 2421 c->imax[i] = a->imax[i]; 2422 c->ilen[i] = a->ilen[i]; 2423 } 2424 2425 /* allocate the matrix space */ 2426 c->singlemalloc = PETSC_TRUE; 2427 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2428 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2429 c->j = (PetscInt*)(c->a + nz*bs2); 2430 c->i = c->j + nz; 2431 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2432 if (mbs > 0) { 2433 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2434 if (cpvalues == MAT_COPY_VALUES) { 2435 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2436 } else { 2437 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2438 } 2439 } 2440 2441 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2442 c->sorted = a->sorted; 2443 c->roworiented = a->roworiented; 2444 c->nonew = a->nonew; 2445 2446 if (a->diag) { 2447 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2448 PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt)); 2449 for (i=0; i<mbs; i++) { 2450 c->diag[i] = a->diag[i]; 2451 } 2452 } else c->diag = 0; 2453 c->nz = a->nz; 2454 c->maxnz = a->maxnz; 2455 c->solve_work = 0; 2456 c->mult_work = 0; 2457 C->preallocated = PETSC_TRUE; 2458 C->assembled = PETSC_TRUE; 2459 *B = C; 2460 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatLoad_SeqBAIJ" 2466 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2467 { 2468 Mat_SeqBAIJ *a; 2469 Mat B; 2470 PetscErrorCode ierr; 2471 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2472 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2473 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2474 PetscInt *masked,nmask,tmp,bs2,ishift; 2475 PetscMPIInt size; 2476 int fd; 2477 PetscScalar *aa; 2478 MPI_Comm comm = ((PetscObject)viewer)->comm; 2479 2480 PetscFunctionBegin; 2481 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2482 bs2 = bs*bs; 2483 2484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2485 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2486 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2487 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2488 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2489 M = header[1]; N = header[2]; nz = header[3]; 2490 2491 if (header[3] < 0) { 2492 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2493 } 2494 2495 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2496 2497 /* 2498 This code adds extra rows to make sure the number of rows is 2499 divisible by the blocksize 2500 */ 2501 mbs = M/bs; 2502 extra_rows = bs - M + bs*(mbs); 2503 if (extra_rows == bs) extra_rows = 0; 2504 else mbs++; 2505 if (extra_rows) { 2506 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2507 } 2508 2509 /* read in row lengths */ 2510 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2511 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2512 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2513 2514 /* read in column indices */ 2515 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2516 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2517 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2518 2519 /* loop over row lengths determining block row lengths */ 2520 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2521 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2522 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2523 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2524 masked = mask + mbs; 2525 rowcount = 0; nzcount = 0; 2526 for (i=0; i<mbs; i++) { 2527 nmask = 0; 2528 for (j=0; j<bs; j++) { 2529 kmax = rowlengths[rowcount]; 2530 for (k=0; k<kmax; k++) { 2531 tmp = jj[nzcount++]/bs; 2532 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2533 } 2534 rowcount++; 2535 } 2536 browlengths[i] += nmask; 2537 /* zero out the mask elements we set */ 2538 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2539 } 2540 2541 /* create our matrix */ 2542 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2543 ierr = MatSetType(B,type);CHKERRQ(ierr); 2544 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2545 a = (Mat_SeqBAIJ*)B->data; 2546 2547 /* set matrix "i" values */ 2548 a->i[0] = 0; 2549 for (i=1; i<= mbs; i++) { 2550 a->i[i] = a->i[i-1] + browlengths[i-1]; 2551 a->ilen[i-1] = browlengths[i-1]; 2552 } 2553 a->nz = 0; 2554 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2555 2556 /* read in nonzero values */ 2557 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2558 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2559 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2560 2561 /* set "a" and "j" values into matrix */ 2562 nzcount = 0; jcount = 0; 2563 for (i=0; i<mbs; i++) { 2564 nzcountb = nzcount; 2565 nmask = 0; 2566 for (j=0; j<bs; j++) { 2567 kmax = rowlengths[i*bs+j]; 2568 for (k=0; k<kmax; k++) { 2569 tmp = jj[nzcount++]/bs; 2570 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2571 } 2572 } 2573 /* sort the masked values */ 2574 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2575 2576 /* set "j" values into matrix */ 2577 maskcount = 1; 2578 for (j=0; j<nmask; j++) { 2579 a->j[jcount++] = masked[j]; 2580 mask[masked[j]] = maskcount++; 2581 } 2582 /* set "a" values into matrix */ 2583 ishift = bs2*a->i[i]; 2584 for (j=0; j<bs; j++) { 2585 kmax = rowlengths[i*bs+j]; 2586 for (k=0; k<kmax; k++) { 2587 tmp = jj[nzcountb]/bs ; 2588 block = mask[tmp] - 1; 2589 point = jj[nzcountb] - bs*tmp; 2590 idx = ishift + bs2*block + j + bs*point; 2591 a->a[idx] = (MatScalar)aa[nzcountb++]; 2592 } 2593 } 2594 /* zero out the mask elements we set */ 2595 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2596 } 2597 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2598 2599 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2600 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2601 ierr = PetscFree(aa);CHKERRQ(ierr); 2602 ierr = PetscFree(jj);CHKERRQ(ierr); 2603 ierr = PetscFree(mask);CHKERRQ(ierr); 2604 2605 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2606 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2607 ierr = MatView_Private(B);CHKERRQ(ierr); 2608 2609 *A = B; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatCreateSeqBAIJ" 2615 /*@C 2616 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2617 compressed row) format. For good matrix assembly performance the 2618 user should preallocate the matrix storage by setting the parameter nz 2619 (or the array nnz). By setting these parameters accurately, performance 2620 during matrix assembly can be increased by more than a factor of 50. 2621 2622 Collective on MPI_Comm 2623 2624 Input Parameters: 2625 + comm - MPI communicator, set to PETSC_COMM_SELF 2626 . bs - size of block 2627 . m - number of rows 2628 . n - number of columns 2629 . nz - number of nonzero blocks per block row (same for all rows) 2630 - nnz - array containing the number of nonzero blocks in the various block rows 2631 (possibly different for each block row) or PETSC_NULL 2632 2633 Output Parameter: 2634 . A - the matrix 2635 2636 Options Database Keys: 2637 . -mat_no_unroll - uses code that does not unroll the loops in the 2638 block calculations (much slower) 2639 . -mat_block_size - size of the blocks to use 2640 2641 Level: intermediate 2642 2643 Notes: 2644 A nonzero block is any block that as 1 or more nonzeros in it 2645 2646 The block AIJ format is fully compatible with standard Fortran 77 2647 storage. That is, the stored row and column indices can begin at 2648 either one (as in Fortran) or zero. See the users' manual for details. 2649 2650 Specify the preallocated storage with either nz or nnz (not both). 2651 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2652 allocation. For additional details, see the users manual chapter on 2653 matrices. 2654 2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2656 @*/ 2657 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2658 { 2659 PetscErrorCode ierr; 2660 2661 PetscFunctionBegin; 2662 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2663 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2664 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2670 /*@C 2671 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2672 per row in the matrix. For good matrix assembly performance the 2673 user should preallocate the matrix storage by setting the parameter nz 2674 (or the array nnz). By setting these parameters accurately, performance 2675 during matrix assembly can be increased by more than a factor of 50. 2676 2677 Collective on MPI_Comm 2678 2679 Input Parameters: 2680 + A - the matrix 2681 . bs - size of block 2682 . nz - number of block nonzeros per block row (same for all rows) 2683 - nnz - array containing the number of block nonzeros in the various block rows 2684 (possibly different for each block row) or PETSC_NULL 2685 2686 Options Database Keys: 2687 . -mat_no_unroll - uses code that does not unroll the loops in the 2688 block calculations (much slower) 2689 . -mat_block_size - size of the blocks to use 2690 2691 Level: intermediate 2692 2693 Notes: 2694 The block AIJ format is fully compatible with standard Fortran 77 2695 storage. That is, the stored row and column indices can begin at 2696 either one (as in Fortran) or zero. See the users' manual for details. 2697 2698 Specify the preallocated storage with either nz or nnz (not both). 2699 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2700 allocation. For additional details, see the users manual chapter on 2701 matrices. 2702 2703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2704 @*/ 2705 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2706 { 2707 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2708 2709 PetscFunctionBegin; 2710 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2711 if (f) { 2712 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2713 } 2714 PetscFunctionReturn(0); 2715 } 2716 2717