1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for Fun3d sequential benchmark 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 stepval = (n-1)*4; 590 for (k=0; k<m; k++) { /* loop over added rows */ 591 row = im[k]; 592 rp = aj + ai[row]; 593 ap = aa + 16*ai[row]; 594 nrow = ailen[row]; 595 low = 0; 596 for (l=0; l<n; l++) { /* loop over added columns */ 597 col = in[l]; 598 value = v + k*(stepval+4)*4 + l*4; 599 low = 0; high = nrow; 600 while (high-low > 7) { 601 t = (low+high)/2; 602 if (rp[t] > col) high = t; 603 else low = t; 604 } 605 for (i=low; i<high; i++) { 606 if (rp[i] > col) break; 607 if (rp[i] == col) { 608 bap = ap + 16*i; 609 for (ii=0; ii<4; ii++,value+=stepval) { 610 for (jj=ii; jj<16; jj+=4) { 611 bap[jj] += *value++; 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 N = nrow++ - 1; 618 /* shift up all the later entries in this row */ 619 for (ii=N; ii>=i; ii--) { 620 rp[ii+1] = rp[ii]; 621 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 622 } 623 if (N >= i) { 624 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 625 } 626 rp[i] = col; 627 bap = ap + 16*i; 628 for (ii=0; ii<4; ii++,value+=stepval) { 629 for (jj=ii; jj<16; jj+=4) { 630 bap[jj] = *value++; 631 } 632 } 633 noinsert2:; 634 low = i; 635 } 636 ailen[row] = nrow; 637 } 638 PetscFunctionReturnVoid(); 639 } 640 EXTERN_C_END 641 642 #if defined(PETSC_HAVE_FORTRAN_CAPS) 643 #define matsetvalues4_ MATSETVALUES4 644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 645 #define matsetvalues4_ matsetvalues4 646 #endif 647 648 EXTERN_C_BEGIN 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSetValues4_" 651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 652 { 653 Mat A = *AA; 654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 655 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 656 PetscInt *ai=a->i,*ailen=a->ilen; 657 PetscInt *aj=a->j,brow,bcol; 658 PetscInt ridx,cidx; 659 MatScalar *ap,value,*aa=a->a,*bap; 660 661 PetscFunctionBegin; 662 for (k=0; k<m; k++) { /* loop over added rows */ 663 row = im[k]; brow = row/4; 664 rp = aj + ai[brow]; 665 ap = aa + 16*ai[brow]; 666 nrow = ailen[brow]; 667 low = 0; 668 for (l=0; l<n; l++) { /* loop over added columns */ 669 col = in[l]; bcol = col/4; 670 ridx = row % 4; cidx = col % 4; 671 value = v[l + k*n]; 672 low = 0; high = nrow; 673 while (high-low > 7) { 674 t = (low+high)/2; 675 if (rp[t] > bcol) high = t; 676 else low = t; 677 } 678 for (i=low; i<high; i++) { 679 if (rp[i] > bcol) break; 680 if (rp[i] == bcol) { 681 bap = ap + 16*i + 4*cidx + ridx; 682 *bap += value; 683 goto noinsert1; 684 } 685 } 686 N = nrow++ - 1; 687 /* shift up all the later entries in this row */ 688 for (ii=N; ii>=i; ii--) { 689 rp[ii+1] = rp[ii]; 690 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 691 } 692 if (N>=i) { 693 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 694 } 695 rp[i] = bcol; 696 ap[16*i + 4*cidx + ridx] = value; 697 noinsert1:; 698 low = i; 699 } 700 ailen[brow] = nrow; 701 } 702 PetscFunctionReturnVoid(); 703 } 704 EXTERN_C_END 705 706 /* UGLY, ugly, ugly 707 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 708 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 709 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 710 converts the entries into single precision and then calls ..._MatScalar() to put them 711 into the single precision data structures. 712 */ 713 #if defined(PETSC_USE_MAT_SINGLE) 714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 715 #else 716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 717 #endif 718 719 #define CHUNKSIZE 10 720 721 /* 722 Checks for missing diagonals 723 */ 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 727 { 728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 729 PetscErrorCode ierr; 730 PetscInt *diag,*jj = a->j,i; 731 732 PetscFunctionBegin; 733 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 734 diag = a->diag; 735 for (i=0; i<a->mbs; i++) { 736 if (jj[diag[i]] != i) { 737 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 746 { 747 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscInt i,j,*diag,m = a->mbs; 750 751 PetscFunctionBegin; 752 if (a->diag) PetscFunctionReturn(0); 753 754 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 755 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 756 for (i=0; i<m; i++) { 757 diag[i] = a->i[i+1]; 758 for (j=a->i[i]; j<a->i[i+1]; j++) { 759 if (a->j[j] == i) { 760 diag[i] = j; 761 break; 762 } 763 } 764 } 765 a->diag = diag; 766 PetscFunctionReturn(0); 767 } 768 769 770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 775 { 776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 777 PetscErrorCode ierr; 778 PetscInt n = a->mbs,i; 779 780 PetscFunctionBegin; 781 *nn = n; 782 if (!ia) PetscFunctionReturn(0); 783 if (symmetric) { 784 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 785 } else if (oshift == 1) { 786 /* temporarily add 1 to i and j indices */ 787 PetscInt nz = a->i[n]; 788 for (i=0; i<nz; i++) a->j[i]++; 789 for (i=0; i<n+1; i++) a->i[i]++; 790 *ia = a->i; *ja = a->j; 791 } else { 792 *ia = a->i; *ja = a->j; 793 } 794 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 801 { 802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt i,n = a->mbs; 805 806 PetscFunctionBegin; 807 if (!ia) PetscFunctionReturn(0); 808 if (symmetric) { 809 ierr = PetscFree(*ia);CHKERRQ(ierr); 810 ierr = PetscFree(*ja);CHKERRQ(ierr); 811 } else if (oshift == 1) { 812 PetscInt nz = a->i[n]-1; 813 for (i=0; i<nz; i++) a->j[i]--; 814 for (i=0; i<n+1; i++) a->i[i]--; 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatDestroy_SeqBAIJ" 821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 822 { 823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 #if defined(PETSC_USE_LOG) 828 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 829 #endif 830 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 831 if (a->row) { 832 ierr = ISDestroy(a->row);CHKERRQ(ierr); 833 } 834 if (a->col) { 835 ierr = ISDestroy(a->col);CHKERRQ(ierr); 836 } 837 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 838 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 839 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 840 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 841 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 842 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 843 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 844 #if defined(PETSC_USE_MAT_SINGLE) 845 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 846 #endif 847 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 848 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 849 850 ierr = PetscFree(a);CHKERRQ(ierr); 851 852 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatSetOption_SeqBAIJ" 864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 865 { 866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 switch (op) { 871 case MAT_ROW_ORIENTED: 872 a->roworiented = PETSC_TRUE; 873 break; 874 case MAT_COLUMN_ORIENTED: 875 a->roworiented = PETSC_FALSE; 876 break; 877 case MAT_COLUMNS_SORTED: 878 a->sorted = PETSC_TRUE; 879 break; 880 case MAT_COLUMNS_UNSORTED: 881 a->sorted = PETSC_FALSE; 882 break; 883 case MAT_KEEP_ZEROED_ROWS: 884 a->keepzeroedrows = PETSC_TRUE; 885 break; 886 case MAT_NO_NEW_NONZERO_LOCATIONS: 887 a->nonew = 1; 888 break; 889 case MAT_NEW_NONZERO_LOCATION_ERR: 890 a->nonew = -1; 891 break; 892 case MAT_NEW_NONZERO_ALLOCATION_ERR: 893 a->nonew = -2; 894 break; 895 case MAT_YES_NEW_NONZERO_LOCATIONS: 896 a->nonew = 0; 897 break; 898 case MAT_ROWS_SORTED: 899 case MAT_ROWS_UNSORTED: 900 case MAT_YES_NEW_DIAGONALS: 901 case MAT_IGNORE_OFF_PROC_ENTRIES: 902 case MAT_USE_HASH_TABLE: 903 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 904 break; 905 case MAT_NO_NEW_DIAGONALS: 906 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 907 case MAT_SYMMETRIC: 908 case MAT_STRUCTURALLY_SYMMETRIC: 909 case MAT_NOT_SYMMETRIC: 910 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 911 case MAT_HERMITIAN: 912 case MAT_NOT_HERMITIAN: 913 case MAT_SYMMETRY_ETERNAL: 914 case MAT_NOT_SYMMETRY_ETERNAL: 915 break; 916 default: 917 SETERRQ(PETSC_ERR_SUP,"unknown option"); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatGetRow_SeqBAIJ" 924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 925 { 926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 927 PetscErrorCode ierr; 928 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 929 MatScalar *aa,*aa_i; 930 PetscScalar *v_i; 931 932 PetscFunctionBegin; 933 bs = A->bs; 934 ai = a->i; 935 aj = a->j; 936 aa = a->a; 937 bs2 = a->bs2; 938 939 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 940 941 bn = row/bs; /* Block number */ 942 bp = row % bs; /* Block Position */ 943 M = ai[bn+1] - ai[bn]; 944 *nz = bs*M; 945 946 if (v) { 947 *v = 0; 948 if (*nz) { 949 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 950 for (i=0; i<M; i++) { /* for each block in the block row */ 951 v_i = *v + i*bs; 952 aa_i = aa + bs2*(ai[bn] + i); 953 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 954 } 955 } 956 } 957 958 if (idx) { 959 *idx = 0; 960 if (*nz) { 961 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 962 for (i=0; i<M; i++) { /* for each block in the block row */ 963 idx_i = *idx + i*bs; 964 itmp = bs*aj[ai[bn] + i]; 965 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 966 } 967 } 968 } 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 975 { 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 980 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatTranspose_SeqBAIJ" 986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 987 { 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 989 Mat C; 990 PetscErrorCode ierr; 991 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 992 PetscInt *rows,*cols,bs2=a->bs2; 993 PetscScalar *array; 994 995 PetscFunctionBegin; 996 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 997 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 998 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 999 1000 #if defined(PETSC_USE_MAT_SINGLE) 1001 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1002 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1003 #else 1004 array = a->a; 1005 #endif 1006 1007 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1008 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 1009 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1010 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1011 ierr = PetscFree(col);CHKERRQ(ierr); 1012 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1013 cols = rows + bs; 1014 for (i=0; i<mbs; i++) { 1015 cols[0] = i*bs; 1016 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1017 len = ai[i+1] - ai[i]; 1018 for (j=0; j<len; j++) { 1019 rows[0] = (*aj++)*bs; 1020 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1021 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1022 array += bs2; 1023 } 1024 } 1025 ierr = PetscFree(rows);CHKERRQ(ierr); 1026 #if defined(PETSC_USE_MAT_SINGLE) 1027 ierr = PetscFree(array); 1028 #endif 1029 1030 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 1033 if (B) { 1034 *B = C; 1035 } else { 1036 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1037 } 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1043 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1044 { 1045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046 PetscErrorCode ierr; 1047 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1048 int fd; 1049 PetscScalar *aa; 1050 FILE *file; 1051 1052 PetscFunctionBegin; 1053 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1054 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1055 col_lens[0] = MAT_FILE_COOKIE; 1056 1057 col_lens[1] = A->m; 1058 col_lens[2] = A->n; 1059 col_lens[3] = a->nz*bs2; 1060 1061 /* store lengths of each row and write (including header) to file */ 1062 count = 0; 1063 for (i=0; i<a->mbs; i++) { 1064 for (j=0; j<bs; j++) { 1065 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1066 } 1067 } 1068 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1069 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1070 1071 /* store column indices (zero start index) */ 1072 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1073 count = 0; 1074 for (i=0; i<a->mbs; i++) { 1075 for (j=0; j<bs; j++) { 1076 for (k=a->i[i]; k<a->i[i+1]; k++) { 1077 for (l=0; l<bs; l++) { 1078 jj[count++] = bs*a->j[k] + l; 1079 } 1080 } 1081 } 1082 } 1083 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1084 ierr = PetscFree(jj);CHKERRQ(ierr); 1085 1086 /* store nonzero values */ 1087 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1088 count = 0; 1089 for (i=0; i<a->mbs; i++) { 1090 for (j=0; j<bs; j++) { 1091 for (k=a->i[i]; k<a->i[i+1]; k++) { 1092 for (l=0; l<bs; l++) { 1093 aa[count++] = a->a[bs2*k + l*bs + j]; 1094 } 1095 } 1096 } 1097 } 1098 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1099 ierr = PetscFree(aa);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1102 if (file) { 1103 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1104 } 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1110 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1111 { 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1115 PetscViewerFormat format; 1116 1117 PetscFunctionBegin; 1118 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1119 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1120 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1121 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1122 Mat aij; 1123 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1124 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1125 ierr = MatDestroy(aij);CHKERRQ(ierr); 1126 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1127 PetscFunctionReturn(0); 1128 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1129 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1130 for (i=0; i<a->mbs; i++) { 1131 for (j=0; j<bs; j++) { 1132 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1133 for (k=a->i[i]; k<a->i[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 #if defined(PETSC_USE_COMPLEX) 1136 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1137 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1138 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1139 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1140 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1141 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1142 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1143 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1144 } 1145 #else 1146 if (a->a[bs2*k + l*bs + j] != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1148 } 1149 #endif 1150 } 1151 } 1152 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1153 } 1154 } 1155 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1156 } else { 1157 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1158 for (i=0; i<a->mbs; i++) { 1159 for (j=0; j<bs; j++) { 1160 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1161 for (k=a->i[i]; k<a->i[i+1]; k++) { 1162 for (l=0; l<bs; l++) { 1163 #if defined(PETSC_USE_COMPLEX) 1164 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1165 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1166 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1167 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1168 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1169 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1170 } else { 1171 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1172 } 1173 #else 1174 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1175 #endif 1176 } 1177 } 1178 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1179 } 1180 } 1181 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1189 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1190 { 1191 Mat A = (Mat) Aa; 1192 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1195 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1196 MatScalar *aa; 1197 PetscViewer viewer; 1198 1199 PetscFunctionBegin; 1200 1201 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1202 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1203 1204 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1205 1206 /* loop over matrix elements drawing boxes */ 1207 color = PETSC_DRAW_BLUE; 1208 for (i=0,row=0; i<mbs; i++,row+=bs) { 1209 for (j=a->i[i]; j<a->i[i+1]; j++) { 1210 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1211 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1212 aa = a->a + j*bs2; 1213 for (k=0; k<bs; k++) { 1214 for (l=0; l<bs; l++) { 1215 if (PetscRealPart(*aa++) >= 0.) continue; 1216 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1217 } 1218 } 1219 } 1220 } 1221 color = PETSC_DRAW_CYAN; 1222 for (i=0,row=0; i<mbs; i++,row+=bs) { 1223 for (j=a->i[i]; j<a->i[i+1]; j++) { 1224 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1225 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1226 aa = a->a + j*bs2; 1227 for (k=0; k<bs; k++) { 1228 for (l=0; l<bs; l++) { 1229 if (PetscRealPart(*aa++) != 0.) continue; 1230 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1231 } 1232 } 1233 } 1234 } 1235 1236 color = PETSC_DRAW_RED; 1237 for (i=0,row=0; i<mbs; i++,row+=bs) { 1238 for (j=a->i[i]; j<a->i[i+1]; j++) { 1239 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1241 aa = a->a + j*bs2; 1242 for (k=0; k<bs; k++) { 1243 for (l=0; l<bs; l++) { 1244 if (PetscRealPart(*aa++) <= 0.) continue; 1245 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1246 } 1247 } 1248 } 1249 } 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1255 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1256 { 1257 PetscErrorCode ierr; 1258 PetscReal xl,yl,xr,yr,w,h; 1259 PetscDraw draw; 1260 PetscTruth isnull; 1261 1262 PetscFunctionBegin; 1263 1264 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1265 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1266 1267 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1268 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1269 xr += w; yr += h; xl = -w; yl = -h; 1270 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1271 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1272 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatView_SeqBAIJ" 1278 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1279 { 1280 PetscErrorCode ierr; 1281 PetscTruth iascii,isbinary,isdraw; 1282 1283 PetscFunctionBegin; 1284 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1285 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1286 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1287 if (iascii){ 1288 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1289 } else if (isbinary) { 1290 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1291 } else if (isdraw) { 1292 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1293 } else { 1294 Mat B; 1295 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1296 ierr = MatView(B,viewer);CHKERRQ(ierr); 1297 ierr = MatDestroy(B);CHKERRQ(ierr); 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1305 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1306 { 1307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1308 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1309 PetscInt *ai = a->i,*ailen = a->ilen; 1310 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1311 MatScalar *ap,*aa = a->a,zero = 0.0; 1312 1313 PetscFunctionBegin; 1314 for (k=0; k<m; k++) { /* loop over rows */ 1315 row = im[k]; brow = row/bs; 1316 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1317 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1318 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1319 nrow = ailen[brow]; 1320 for (l=0; l<n; l++) { /* loop over columns */ 1321 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1322 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1323 col = in[l] ; 1324 bcol = col/bs; 1325 cidx = col%bs; 1326 ridx = row%bs; 1327 high = nrow; 1328 low = 0; /* assume unsorted */ 1329 while (high-low > 5) { 1330 t = (low+high)/2; 1331 if (rp[t] > bcol) high = t; 1332 else low = t; 1333 } 1334 for (i=low; i<high; i++) { 1335 if (rp[i] > bcol) break; 1336 if (rp[i] == bcol) { 1337 *v++ = ap[bs2*i+bs*cidx+ridx]; 1338 goto finished; 1339 } 1340 } 1341 *v++ = zero; 1342 finished:; 1343 } 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #if defined(PETSC_USE_MAT_SINGLE) 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1351 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1352 { 1353 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1354 PetscErrorCode ierr; 1355 PetscInt i,N = m*n*b->bs2; 1356 MatScalar *vsingle; 1357 1358 PetscFunctionBegin; 1359 if (N > b->setvalueslen) { 1360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1362 b->setvalueslen = N; 1363 } 1364 vsingle = b->setvaluescopy; 1365 for (i=0; i<N; i++) { 1366 vsingle[i] = v[i]; 1367 } 1368 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 #endif 1372 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1376 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1377 { 1378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1379 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1380 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1381 PetscErrorCode ierr; 1382 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1383 PetscTruth roworiented=a->roworiented; 1384 const MatScalar *value = v; 1385 MatScalar *ap,*aa = a->a,*bap; 1386 1387 PetscFunctionBegin; 1388 if (roworiented) { 1389 stepval = (n-1)*bs; 1390 } else { 1391 stepval = (m-1)*bs; 1392 } 1393 for (k=0; k<m; k++) { /* loop over added rows */ 1394 row = im[k]; 1395 if (row < 0) continue; 1396 #if defined(PETSC_USE_DEBUG) 1397 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1398 #endif 1399 rp = aj + ai[row]; 1400 ap = aa + bs2*ai[row]; 1401 rmax = imax[row]; 1402 nrow = ailen[row]; 1403 low = 0; 1404 high = nrow; 1405 for (l=0; l<n; l++) { /* loop over added columns */ 1406 if (in[l] < 0) continue; 1407 #if defined(PETSC_USE_DEBUG) 1408 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1409 #endif 1410 col = in[l]; 1411 if (roworiented) { 1412 value = v + k*(stepval+bs)*bs + l*bs; 1413 } else { 1414 value = v + l*(stepval+bs)*bs + k*bs; 1415 } 1416 if (col < lastcol) low = 0; else high = nrow; 1417 lastcol = col; 1418 while (high-low > 7) { 1419 t = (low+high)/2; 1420 if (rp[t] > col) high = t; 1421 else low = t; 1422 } 1423 for (i=low; i<high; i++) { 1424 if (rp[i] > col) break; 1425 if (rp[i] == col) { 1426 bap = ap + bs2*i; 1427 if (roworiented) { 1428 if (is == ADD_VALUES) { 1429 for (ii=0; ii<bs; ii++,value+=stepval) { 1430 for (jj=ii; jj<bs2; jj+=bs) { 1431 bap[jj] += *value++; 1432 } 1433 } 1434 } else { 1435 for (ii=0; ii<bs; ii++,value+=stepval) { 1436 for (jj=ii; jj<bs2; jj+=bs) { 1437 bap[jj] = *value++; 1438 } 1439 } 1440 } 1441 } else { 1442 if (is == ADD_VALUES) { 1443 for (ii=0; ii<bs; ii++,value+=stepval) { 1444 for (jj=0; jj<bs; jj++) { 1445 *bap++ += *value++; 1446 } 1447 } 1448 } else { 1449 for (ii=0; ii<bs; ii++,value+=stepval) { 1450 for (jj=0; jj<bs; jj++) { 1451 *bap++ = *value++; 1452 } 1453 } 1454 } 1455 } 1456 goto noinsert2; 1457 } 1458 } 1459 if (nonew == 1) goto noinsert2; 1460 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1461 if (nrow >= rmax) { 1462 /* there is no extra room in row, therefore enlarge */ 1463 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1464 MatScalar *new_a; 1465 1466 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1467 1468 /* malloc new storage space */ 1469 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 1470 1471 /* copy over old data into new slots */ 1472 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1473 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1474 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1475 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1476 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1477 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1478 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1479 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1480 /* free up old matrix storage */ 1481 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\ 1482 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1483 a->singlemalloc = PETSC_TRUE; 1484 1485 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1486 rmax = imax[row] = imax[row] + CHUNKSIZE; 1487 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1488 a->maxnz += bs2*CHUNKSIZE; 1489 a->reallocs++; 1490 a->nz++; 1491 } 1492 N = nrow++ - 1; 1493 /* shift up all the later entries in this row */ 1494 for (ii=N; ii>=i; ii--) { 1495 rp[ii+1] = rp[ii]; 1496 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1497 } 1498 if (N >= i) { 1499 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1500 } 1501 rp[i] = col; 1502 bap = ap + bs2*i; 1503 if (roworiented) { 1504 for (ii=0; ii<bs; ii++,value+=stepval) { 1505 for (jj=ii; jj<bs2; jj+=bs) { 1506 bap[jj] = *value++; 1507 } 1508 } 1509 } else { 1510 for (ii=0; ii<bs; ii++,value+=stepval) { 1511 for (jj=0; jj<bs; jj++) { 1512 *bap++ = *value++; 1513 } 1514 } 1515 } 1516 noinsert2:; 1517 low = i; 1518 } 1519 ailen[row] = nrow; 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1526 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1527 { 1528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1529 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1530 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1531 PetscErrorCode ierr; 1532 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1533 MatScalar *aa = a->a,*ap; 1534 PetscReal ratio=0.6; 1535 1536 PetscFunctionBegin; 1537 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1538 1539 if (m) rmax = ailen[0]; 1540 for (i=1; i<mbs; i++) { 1541 /* move each row back by the amount of empty slots (fshift) before it*/ 1542 fshift += imax[i-1] - ailen[i-1]; 1543 rmax = PetscMax(rmax,ailen[i]); 1544 if (fshift) { 1545 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1546 N = ailen[i]; 1547 for (j=0; j<N; j++) { 1548 ip[j-fshift] = ip[j]; 1549 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1550 } 1551 } 1552 ai[i] = ai[i-1] + ailen[i-1]; 1553 } 1554 if (mbs) { 1555 fshift += imax[mbs-1] - ailen[mbs-1]; 1556 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1557 } 1558 /* reset ilen and imax for each row */ 1559 for (i=0; i<mbs; i++) { 1560 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1561 } 1562 a->nz = ai[mbs]; 1563 1564 /* diagonals may have moved, so kill the diagonal pointers */ 1565 a->idiagvalid = PETSC_FALSE; 1566 if (fshift && a->diag) { 1567 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1568 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1569 a->diag = 0; 1570 } 1571 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1572 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1573 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1574 a->reallocs = 0; 1575 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1576 1577 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1578 if (a->compressedrow.use){ 1579 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1580 } 1581 1582 A->same_nonzero = PETSC_TRUE; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 /* 1587 This function returns an array of flags which indicate the locations of contiguous 1588 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1589 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1590 Assume: sizes should be long enough to hold all the values. 1591 */ 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1594 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1595 { 1596 PetscInt i,j,k,row; 1597 PetscTruth flg; 1598 1599 PetscFunctionBegin; 1600 for (i=0,j=0; i<n; j++) { 1601 row = idx[i]; 1602 if (row%bs!=0) { /* Not the begining of a block */ 1603 sizes[j] = 1; 1604 i++; 1605 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1606 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1607 i++; 1608 } else { /* Begining of the block, so check if the complete block exists */ 1609 flg = PETSC_TRUE; 1610 for (k=1; k<bs; k++) { 1611 if (row+k != idx[i+k]) { /* break in the block */ 1612 flg = PETSC_FALSE; 1613 break; 1614 } 1615 } 1616 if (flg) { /* No break in the bs */ 1617 sizes[j] = bs; 1618 i+= bs; 1619 } else { 1620 sizes[j] = 1; 1621 i++; 1622 } 1623 } 1624 } 1625 *bs_max = j; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1631 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1632 { 1633 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1634 PetscErrorCode ierr; 1635 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1636 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1637 PetscScalar zero = 0.0; 1638 MatScalar *aa; 1639 1640 PetscFunctionBegin; 1641 /* Make a copy of the IS and sort it */ 1642 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1643 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1644 1645 /* allocate memory for rows,sizes */ 1646 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1647 sizes = rows + is_n; 1648 1649 /* copy IS values to rows, and sort them */ 1650 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1651 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1652 if (baij->keepzeroedrows) { 1653 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1654 bs_max = is_n; 1655 A->same_nonzero = PETSC_TRUE; 1656 } else { 1657 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1658 A->same_nonzero = PETSC_FALSE; 1659 } 1660 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1661 1662 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1663 row = rows[j]; 1664 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1665 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1666 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1667 if (sizes[i] == bs && !baij->keepzeroedrows) { 1668 if (diag) { 1669 if (baij->ilen[row/bs] > 0) { 1670 baij->ilen[row/bs] = 1; 1671 baij->j[baij->i[row/bs]] = row/bs; 1672 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1673 } 1674 /* Now insert all the diagonal values for this bs */ 1675 for (k=0; k<bs; k++) { 1676 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1677 } 1678 } else { /* (!diag) */ 1679 baij->ilen[row/bs] = 0; 1680 } /* end (!diag) */ 1681 } else { /* (sizes[i] != bs) */ 1682 #if defined (PETSC_USE_DEBUG) 1683 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1684 #endif 1685 for (k=0; k<count; k++) { 1686 aa[0] = zero; 1687 aa += bs; 1688 } 1689 if (diag) { 1690 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1691 } 1692 } 1693 } 1694 1695 ierr = PetscFree(rows);CHKERRQ(ierr); 1696 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1702 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1705 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1706 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1707 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1708 PetscErrorCode ierr; 1709 PetscInt ridx,cidx,bs2=a->bs2; 1710 PetscTruth roworiented=a->roworiented; 1711 MatScalar *ap,value,*aa=a->a,*bap; 1712 1713 PetscFunctionBegin; 1714 for (k=0; k<m; k++) { /* loop over added rows */ 1715 row = im[k]; brow = row/bs; 1716 if (row < 0) continue; 1717 #if defined(PETSC_USE_DEBUG) 1718 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1719 #endif 1720 rp = aj + ai[brow]; 1721 ap = aa + bs2*ai[brow]; 1722 rmax = imax[brow]; 1723 nrow = ailen[brow]; 1724 low = 0; 1725 high = nrow; 1726 for (l=0; l<n; l++) { /* loop over added columns */ 1727 if (in[l] < 0) continue; 1728 #if defined(PETSC_USE_DEBUG) 1729 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1730 #endif 1731 col = in[l]; bcol = col/bs; 1732 ridx = row % bs; cidx = col % bs; 1733 if (roworiented) { 1734 value = v[l + k*n]; 1735 } else { 1736 value = v[k + l*m]; 1737 } 1738 if (col < lastcol) low = 0; else high = nrow; 1739 lastcol = col; 1740 while (high-low > 7) { 1741 t = (low+high)/2; 1742 if (rp[t] > bcol) high = t; 1743 else low = t; 1744 } 1745 for (i=low; i<high; i++) { 1746 if (rp[i] > bcol) break; 1747 if (rp[i] == bcol) { 1748 bap = ap + bs2*i + bs*cidx + ridx; 1749 if (is == ADD_VALUES) *bap += value; 1750 else *bap = value; 1751 goto noinsert1; 1752 } 1753 } 1754 if (nonew == 1) goto noinsert1; 1755 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1756 if (nrow >= rmax) { 1757 /* there is no extra room in row, therefore enlarge */ 1758 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1759 MatScalar *new_a; 1760 1761 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1762 1763 /* Malloc new storage space */ 1764 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 1765 1766 /* copy over old data into new slots */ 1767 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1768 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1769 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1770 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1771 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1773 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1774 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1775 /* free up old matrix storage */ 1776 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 1777 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1778 a->singlemalloc = PETSC_TRUE; 1779 1780 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1781 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1782 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1783 a->maxnz += bs2*CHUNKSIZE; 1784 a->reallocs++; 1785 a->nz++; 1786 } 1787 N = nrow++ - 1; 1788 /* shift up all the later entries in this row */ 1789 for (ii=N; ii>=i; ii--) { 1790 rp[ii+1] = rp[ii]; 1791 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1792 } 1793 if (N>=i) { 1794 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1795 } 1796 rp[i] = bcol; 1797 ap[bs2*i + bs*cidx + ridx] = value; 1798 noinsert1:; 1799 low = i; 1800 } 1801 ailen[brow] = nrow; 1802 } 1803 A->same_nonzero = PETSC_FALSE; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1810 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1811 { 1812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1813 Mat outA; 1814 PetscErrorCode ierr; 1815 PetscTruth row_identity,col_identity; 1816 1817 PetscFunctionBegin; 1818 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1819 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1820 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1821 if (!row_identity || !col_identity) { 1822 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1823 } 1824 1825 outA = inA; 1826 inA->factor = FACTOR_LU; 1827 1828 if (!a->diag) { 1829 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1830 } 1831 1832 a->row = row; 1833 a->col = col; 1834 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1835 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1836 1837 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1838 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1839 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1840 1841 /* 1842 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1843 for ILU(0) factorization with natural ordering 1844 */ 1845 if (inA->bs < 8) { 1846 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1847 } else { 1848 if (!a->solve_work) { 1849 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1850 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1851 } 1852 } 1853 1854 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1855 1856 PetscFunctionReturn(0); 1857 } 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1860 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1861 { 1862 static PetscTruth called = PETSC_FALSE; 1863 MPI_Comm comm = A->comm; 1864 PetscErrorCode ierr; 1865 1866 PetscFunctionBegin; 1867 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1868 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1869 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 EXTERN_C_BEGIN 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1876 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1877 { 1878 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1879 PetscInt i,nz,nbs; 1880 1881 PetscFunctionBegin; 1882 nz = baij->maxnz/baij->bs2; 1883 nbs = baij->nbs; 1884 for (i=0; i<nz; i++) { 1885 baij->j[i] = indices[i]; 1886 } 1887 baij->nz = nz; 1888 for (i=0; i<nbs; i++) { 1889 baij->ilen[i] = baij->imax[i]; 1890 } 1891 1892 PetscFunctionReturn(0); 1893 } 1894 EXTERN_C_END 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1898 /*@ 1899 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1900 in the matrix. 1901 1902 Input Parameters: 1903 + mat - the SeqBAIJ matrix 1904 - indices - the column indices 1905 1906 Level: advanced 1907 1908 Notes: 1909 This can be called if you have precomputed the nonzero structure of the 1910 matrix and want to provide it to the matrix object to improve the performance 1911 of the MatSetValues() operation. 1912 1913 You MUST have set the correct numbers of nonzeros per row in the call to 1914 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1915 1916 MUST be called before any calls to MatSetValues(); 1917 1918 @*/ 1919 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1920 { 1921 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1922 1923 PetscFunctionBegin; 1924 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1925 PetscValidPointer(indices,2); 1926 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1927 if (f) { 1928 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1929 } else { 1930 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1937 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1938 { 1939 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1940 PetscErrorCode ierr; 1941 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1942 PetscReal atmp; 1943 PetscScalar *x,zero = 0.0; 1944 MatScalar *aa; 1945 PetscInt ncols,brow,krow,kcol; 1946 1947 PetscFunctionBegin; 1948 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1949 bs = A->bs; 1950 aa = a->a; 1951 ai = a->i; 1952 aj = a->j; 1953 mbs = a->mbs; 1954 1955 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1956 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1957 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1958 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1959 for (i=0; i<mbs; i++) { 1960 ncols = ai[1] - ai[0]; ai++; 1961 brow = bs*i; 1962 for (j=0; j<ncols; j++){ 1963 /* bcol = bs*(*aj); */ 1964 for (kcol=0; kcol<bs; kcol++){ 1965 for (krow=0; krow<bs; krow++){ 1966 atmp = PetscAbsScalar(*aa); aa++; 1967 row = brow + krow; /* row index */ 1968 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1969 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1970 } 1971 } 1972 aj++; 1973 } 1974 } 1975 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1981 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1982 { 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1992 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1993 { 1994 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1995 PetscFunctionBegin; 1996 *array = a->a; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2002 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2003 { 2004 PetscFunctionBegin; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #include "petscblaslapack.h" 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2011 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 2012 { 2013 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,bs=Y->bs,j,bs2; 2016 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 2017 2018 PetscFunctionBegin; 2019 if (str == SAME_NONZERO_PATTERN) { 2020 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2021 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2022 if (y->xtoy && y->XtoY != X) { 2023 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2024 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2025 } 2026 if (!y->xtoy) { /* get xtoy */ 2027 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2028 y->XtoY = X; 2029 } 2030 bs2 = bs*bs; 2031 for (i=0; i<x->nz; i++) { 2032 j = 0; 2033 while (j < bs2){ 2034 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2035 j++; 2036 } 2037 } 2038 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2039 } else { 2040 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 /* -------------------------------------------------------------------*/ 2046 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2047 MatGetRow_SeqBAIJ, 2048 MatRestoreRow_SeqBAIJ, 2049 MatMult_SeqBAIJ_N, 2050 /* 4*/ MatMultAdd_SeqBAIJ_N, 2051 MatMultTranspose_SeqBAIJ, 2052 MatMultTransposeAdd_SeqBAIJ, 2053 MatSolve_SeqBAIJ_N, 2054 0, 2055 0, 2056 /*10*/ 0, 2057 MatLUFactor_SeqBAIJ, 2058 0, 2059 0, 2060 MatTranspose_SeqBAIJ, 2061 /*15*/ MatGetInfo_SeqBAIJ, 2062 MatEqual_SeqBAIJ, 2063 MatGetDiagonal_SeqBAIJ, 2064 MatDiagonalScale_SeqBAIJ, 2065 MatNorm_SeqBAIJ, 2066 /*20*/ 0, 2067 MatAssemblyEnd_SeqBAIJ, 2068 0, 2069 MatSetOption_SeqBAIJ, 2070 MatZeroEntries_SeqBAIJ, 2071 /*25*/ MatZeroRows_SeqBAIJ, 2072 MatLUFactorSymbolic_SeqBAIJ, 2073 MatLUFactorNumeric_SeqBAIJ_N, 2074 MatCholeskyFactorSymbolic_SeqBAIJ, 2075 MatCholeskyFactorNumeric_SeqBAIJ_N, 2076 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2077 MatILUFactorSymbolic_SeqBAIJ, 2078 MatICCFactorSymbolic_SeqBAIJ, 2079 MatGetArray_SeqBAIJ, 2080 MatRestoreArray_SeqBAIJ, 2081 /*35*/ MatDuplicate_SeqBAIJ, 2082 0, 2083 0, 2084 MatILUFactor_SeqBAIJ, 2085 0, 2086 /*40*/ MatAXPY_SeqBAIJ, 2087 MatGetSubMatrices_SeqBAIJ, 2088 MatIncreaseOverlap_SeqBAIJ, 2089 MatGetValues_SeqBAIJ, 2090 0, 2091 /*45*/ MatPrintHelp_SeqBAIJ, 2092 MatScale_SeqBAIJ, 2093 0, 2094 0, 2095 0, 2096 /*50*/ 0, 2097 MatGetRowIJ_SeqBAIJ, 2098 MatRestoreRowIJ_SeqBAIJ, 2099 0, 2100 0, 2101 /*55*/ 0, 2102 0, 2103 0, 2104 0, 2105 MatSetValuesBlocked_SeqBAIJ, 2106 /*60*/ MatGetSubMatrix_SeqBAIJ, 2107 MatDestroy_SeqBAIJ, 2108 MatView_SeqBAIJ, 2109 MatGetPetscMaps_Petsc, 2110 0, 2111 /*65*/ 0, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*70*/ MatGetRowMax_SeqBAIJ, 2117 MatConvert_Basic, 2118 0, 2119 0, 2120 0, 2121 /*75*/ 0, 2122 0, 2123 0, 2124 0, 2125 0, 2126 /*80*/ 0, 2127 0, 2128 0, 2129 0, 2130 MatLoad_SeqBAIJ, 2131 /*85*/ 0, 2132 0, 2133 0, 2134 0, 2135 0, 2136 /*90*/ 0, 2137 0, 2138 0, 2139 0, 2140 0, 2141 /*95*/ 0, 2142 0, 2143 0, 2144 0}; 2145 2146 EXTERN_C_BEGIN 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2149 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2150 { 2151 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2152 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 if (aij->nonew != 1) { 2157 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2158 } 2159 2160 /* allocate space for values if not already there */ 2161 if (!aij->saved_values) { 2162 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2163 } 2164 2165 /* copy values over */ 2166 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 EXTERN_C_END 2170 2171 EXTERN_C_BEGIN 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2174 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2175 { 2176 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2177 PetscErrorCode ierr; 2178 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2179 2180 PetscFunctionBegin; 2181 if (aij->nonew != 1) { 2182 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2183 } 2184 if (!aij->saved_values) { 2185 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2186 } 2187 2188 /* copy values over */ 2189 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 EXTERN_C_END 2193 2194 EXTERN_C_BEGIN 2195 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2196 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2197 EXTERN_C_END 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2203 { 2204 Mat_SeqBAIJ *b; 2205 PetscErrorCode ierr; 2206 PetscInt i,mbs,nbs,bs2,newbs = bs; 2207 PetscTruth flg,skipallocation = PETSC_FALSE; 2208 2209 PetscFunctionBegin; 2210 2211 if (nz == MAT_SKIP_ALLOCATION) { 2212 skipallocation = PETSC_TRUE; 2213 nz = 0; 2214 } 2215 2216 B->preallocated = PETSC_TRUE; 2217 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2218 if (nnz && newbs != bs) { 2219 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2220 } 2221 bs = newbs; 2222 2223 mbs = B->m/bs; 2224 nbs = B->n/bs; 2225 bs2 = bs*bs; 2226 2227 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2228 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2229 } 2230 2231 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2232 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2233 if (nnz) { 2234 for (i=0; i<mbs; i++) { 2235 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2236 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2237 } 2238 } 2239 2240 b = (Mat_SeqBAIJ*)B->data; 2241 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2242 B->ops->solve = MatSolve_SeqBAIJ_Update; 2243 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2244 if (!flg) { 2245 switch (bs) { 2246 case 1: 2247 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2248 B->ops->mult = MatMult_SeqBAIJ_1; 2249 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2250 break; 2251 case 2: 2252 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2253 B->ops->mult = MatMult_SeqBAIJ_2; 2254 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2255 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2256 break; 2257 case 3: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2259 B->ops->mult = MatMult_SeqBAIJ_3; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2261 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2262 break; 2263 case 4: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2265 B->ops->mult = MatMult_SeqBAIJ_4; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2268 break; 2269 case 5: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2271 B->ops->mult = MatMult_SeqBAIJ_5; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2274 break; 2275 case 6: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2277 B->ops->mult = MatMult_SeqBAIJ_6; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2279 break; 2280 case 7: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2282 B->ops->mult = MatMult_SeqBAIJ_7; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2284 break; 2285 default: 2286 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2287 B->ops->mult = MatMult_SeqBAIJ_N; 2288 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2289 break; 2290 } 2291 } 2292 B->bs = bs; 2293 b->mbs = mbs; 2294 b->nbs = nbs; 2295 if (!skipallocation) { 2296 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2297 /* b->ilen will count nonzeros in each block row so far. */ 2298 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2299 if (!nnz) { 2300 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2301 else if (nz <= 0) nz = 1; 2302 for (i=0; i<mbs; i++) b->imax[i] = nz; 2303 nz = nz*mbs; 2304 } else { 2305 nz = 0; 2306 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2307 } 2308 2309 /* allocate the matrix space */ 2310 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 2311 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2312 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2313 b->singlemalloc = PETSC_TRUE; 2314 2315 b->i[0] = 0; 2316 for (i=1; i<mbs+1; i++) { 2317 b->i[i] = b->i[i-1] + b->imax[i-1]; 2318 } 2319 } 2320 2321 B->bs = bs; 2322 b->bs2 = bs2; 2323 b->mbs = mbs; 2324 b->nz = 0; 2325 b->maxnz = nz*bs2; 2326 B->info.nz_unneeded = (PetscReal)b->maxnz; 2327 PetscFunctionReturn(0); 2328 } 2329 EXTERN_C_END 2330 2331 /*MC 2332 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2333 block sparse compressed row format. 2334 2335 Options Database Keys: 2336 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2337 2338 Level: beginner 2339 2340 .seealso: MatCreateSeqBAIJ 2341 M*/ 2342 2343 EXTERN_C_BEGIN 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatCreate_SeqBAIJ" 2346 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2347 { 2348 PetscErrorCode ierr; 2349 PetscMPIInt size; 2350 Mat_SeqBAIJ *b; 2351 2352 PetscFunctionBegin; 2353 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2354 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2355 2356 B->m = B->M = PetscMax(B->m,B->M); 2357 B->n = B->N = PetscMax(B->n,B->N); 2358 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2359 B->data = (void*)b; 2360 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2361 B->factor = 0; 2362 B->mapping = 0; 2363 b->row = 0; 2364 b->col = 0; 2365 b->icol = 0; 2366 b->reallocs = 0; 2367 b->saved_values = 0; 2368 #if defined(PETSC_USE_MAT_SINGLE) 2369 b->setvalueslen = 0; 2370 b->setvaluescopy = PETSC_NULL; 2371 #endif 2372 2373 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2374 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2375 2376 b->sorted = PETSC_FALSE; 2377 b->roworiented = PETSC_TRUE; 2378 b->nonew = 0; 2379 b->diag = 0; 2380 b->solve_work = 0; 2381 b->mult_work = 0; 2382 B->spptr = 0; 2383 B->info.nz_unneeded = (PetscReal)b->maxnz; 2384 b->keepzeroedrows = PETSC_FALSE; 2385 b->xtoy = 0; 2386 b->XtoY = 0; 2387 b->compressedrow.use = PETSC_FALSE; 2388 b->compressedrow.nrows = 0; 2389 b->compressedrow.i = PETSC_NULL; 2390 b->compressedrow.rindex = PETSC_NULL; 2391 b->compressedrow.checked = PETSC_FALSE; 2392 B->same_nonzero = PETSC_FALSE; 2393 2394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2395 "MatInvertBlockDiagonal_SeqBAIJ", 2396 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2398 "MatStoreValues_SeqBAIJ", 2399 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2401 "MatRetrieveValues_SeqBAIJ", 2402 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2404 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2405 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2407 "MatConvert_SeqBAIJ_SeqAIJ", 2408 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2409 #if !defined(PETSC_USE_64BIT_INT) 2410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2411 "MatConvert_SeqBAIJ_SeqSBAIJ", 2412 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2413 #endif 2414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2415 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2416 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 EXTERN_C_END 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2423 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2424 { 2425 Mat C; 2426 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2427 PetscErrorCode ierr; 2428 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2429 2430 PetscFunctionBegin; 2431 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2432 2433 *B = 0; 2434 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2435 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2436 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2437 c = (Mat_SeqBAIJ*)C->data; 2438 2439 C->M = A->M; 2440 C->N = A->N; 2441 C->bs = A->bs; 2442 c->bs2 = a->bs2; 2443 c->mbs = a->mbs; 2444 c->nbs = a->nbs; 2445 2446 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2447 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2448 for (i=0; i<mbs; i++) { 2449 c->imax[i] = a->imax[i]; 2450 c->ilen[i] = a->ilen[i]; 2451 } 2452 2453 /* allocate the matrix space */ 2454 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2455 c->singlemalloc = PETSC_TRUE; 2456 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2457 if (mbs > 0) { 2458 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2459 if (cpvalues == MAT_COPY_VALUES) { 2460 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2461 } else { 2462 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2463 } 2464 } 2465 c->sorted = a->sorted; 2466 c->roworiented = a->roworiented; 2467 c->nonew = a->nonew; 2468 2469 if (a->diag) { 2470 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2471 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2472 for (i=0; i<mbs; i++) { 2473 c->diag[i] = a->diag[i]; 2474 } 2475 } else c->diag = 0; 2476 c->nz = a->nz; 2477 c->maxnz = a->maxnz; 2478 c->solve_work = 0; 2479 c->mult_work = 0; 2480 C->preallocated = PETSC_TRUE; 2481 C->assembled = PETSC_TRUE; 2482 2483 c->compressedrow.use = a->compressedrow.use; 2484 c->compressedrow.nrows = a->compressedrow.nrows; 2485 c->compressedrow.checked = a->compressedrow.checked; 2486 if ( a->compressedrow.checked && a->compressedrow.use){ 2487 i = a->compressedrow.nrows; 2488 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2489 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2490 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2491 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2492 } else { 2493 c->compressedrow.use = PETSC_FALSE; 2494 c->compressedrow.i = PETSC_NULL; 2495 c->compressedrow.rindex = PETSC_NULL; 2496 } 2497 C->same_nonzero = A->same_nonzero; 2498 *B = C; 2499 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatLoad_SeqBAIJ" 2505 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2506 { 2507 Mat_SeqBAIJ *a; 2508 Mat B; 2509 PetscErrorCode ierr; 2510 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2511 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2512 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2513 PetscInt *masked,nmask,tmp,bs2,ishift; 2514 PetscMPIInt size; 2515 int fd; 2516 PetscScalar *aa; 2517 MPI_Comm comm = ((PetscObject)viewer)->comm; 2518 2519 PetscFunctionBegin; 2520 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2521 bs2 = bs*bs; 2522 2523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2524 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2525 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2526 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2527 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2528 M = header[1]; N = header[2]; nz = header[3]; 2529 2530 if (header[3] < 0) { 2531 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2532 } 2533 2534 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2535 2536 /* 2537 This code adds extra rows to make sure the number of rows is 2538 divisible by the blocksize 2539 */ 2540 mbs = M/bs; 2541 extra_rows = bs - M + bs*(mbs); 2542 if (extra_rows == bs) extra_rows = 0; 2543 else mbs++; 2544 if (extra_rows) { 2545 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2546 } 2547 2548 /* read in row lengths */ 2549 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2550 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2551 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2552 2553 /* read in column indices */ 2554 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2555 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2556 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2557 2558 /* loop over row lengths determining block row lengths */ 2559 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2560 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2561 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2562 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2563 masked = mask + mbs; 2564 rowcount = 0; nzcount = 0; 2565 for (i=0; i<mbs; i++) { 2566 nmask = 0; 2567 for (j=0; j<bs; j++) { 2568 kmax = rowlengths[rowcount]; 2569 for (k=0; k<kmax; k++) { 2570 tmp = jj[nzcount++]/bs; 2571 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2572 } 2573 rowcount++; 2574 } 2575 browlengths[i] += nmask; 2576 /* zero out the mask elements we set */ 2577 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2578 } 2579 2580 /* create our matrix */ 2581 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2582 ierr = MatSetType(B,type);CHKERRQ(ierr); 2583 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2584 a = (Mat_SeqBAIJ*)B->data; 2585 2586 /* set matrix "i" values */ 2587 a->i[0] = 0; 2588 for (i=1; i<= mbs; i++) { 2589 a->i[i] = a->i[i-1] + browlengths[i-1]; 2590 a->ilen[i-1] = browlengths[i-1]; 2591 } 2592 a->nz = 0; 2593 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2594 2595 /* read in nonzero values */ 2596 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2597 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2598 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2599 2600 /* set "a" and "j" values into matrix */ 2601 nzcount = 0; jcount = 0; 2602 for (i=0; i<mbs; i++) { 2603 nzcountb = nzcount; 2604 nmask = 0; 2605 for (j=0; j<bs; j++) { 2606 kmax = rowlengths[i*bs+j]; 2607 for (k=0; k<kmax; k++) { 2608 tmp = jj[nzcount++]/bs; 2609 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2610 } 2611 } 2612 /* sort the masked values */ 2613 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2614 2615 /* set "j" values into matrix */ 2616 maskcount = 1; 2617 for (j=0; j<nmask; j++) { 2618 a->j[jcount++] = masked[j]; 2619 mask[masked[j]] = maskcount++; 2620 } 2621 /* set "a" values into matrix */ 2622 ishift = bs2*a->i[i]; 2623 for (j=0; j<bs; j++) { 2624 kmax = rowlengths[i*bs+j]; 2625 for (k=0; k<kmax; k++) { 2626 tmp = jj[nzcountb]/bs ; 2627 block = mask[tmp] - 1; 2628 point = jj[nzcountb] - bs*tmp; 2629 idx = ishift + bs2*block + j + bs*point; 2630 a->a[idx] = (MatScalar)aa[nzcountb++]; 2631 } 2632 } 2633 /* zero out the mask elements we set */ 2634 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2635 } 2636 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2637 2638 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2639 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2640 ierr = PetscFree(aa);CHKERRQ(ierr); 2641 ierr = PetscFree(jj);CHKERRQ(ierr); 2642 ierr = PetscFree(mask);CHKERRQ(ierr); 2643 2644 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2645 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2646 ierr = MatView_Private(B);CHKERRQ(ierr); 2647 2648 *A = B; 2649 PetscFunctionReturn(0); 2650 } 2651 2652 #undef __FUNCT__ 2653 #define __FUNCT__ "MatCreateSeqBAIJ" 2654 /*@C 2655 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2656 compressed row) format. For good matrix assembly performance the 2657 user should preallocate the matrix storage by setting the parameter nz 2658 (or the array nnz). By setting these parameters accurately, performance 2659 during matrix assembly can be increased by more than a factor of 50. 2660 2661 Collective on MPI_Comm 2662 2663 Input Parameters: 2664 + comm - MPI communicator, set to PETSC_COMM_SELF 2665 . bs - size of block 2666 . m - number of rows 2667 . n - number of columns 2668 . nz - number of nonzero blocks per block row (same for all rows) 2669 - nnz - array containing the number of nonzero blocks in the various block rows 2670 (possibly different for each block row) or PETSC_NULL 2671 2672 Output Parameter: 2673 . A - the matrix 2674 2675 Options Database Keys: 2676 . -mat_no_unroll - uses code that does not unroll the loops in the 2677 block calculations (much slower) 2678 . -mat_block_size - size of the blocks to use 2679 2680 Level: intermediate 2681 2682 Notes: 2683 The number of rows and columns must be divisible by blocksize. 2684 2685 If the nnz parameter is given then the nz parameter is ignored 2686 2687 A nonzero block is any block that as 1 or more nonzeros in it 2688 2689 The block AIJ format is fully compatible with standard Fortran 77 2690 storage. That is, the stored row and column indices can begin at 2691 either one (as in Fortran) or zero. See the users' manual for details. 2692 2693 Specify the preallocated storage with either nz or nnz (not both). 2694 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2695 allocation. For additional details, see the users manual chapter on 2696 matrices. 2697 2698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2699 @*/ 2700 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2701 { 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2706 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2707 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2713 /*@C 2714 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2715 per row in the matrix. For good matrix assembly performance the 2716 user should preallocate the matrix storage by setting the parameter nz 2717 (or the array nnz). By setting these parameters accurately, performance 2718 during matrix assembly can be increased by more than a factor of 50. 2719 2720 Collective on MPI_Comm 2721 2722 Input Parameters: 2723 + A - the matrix 2724 . bs - size of block 2725 . nz - number of block nonzeros per block row (same for all rows) 2726 - nnz - array containing the number of block nonzeros in the various block rows 2727 (possibly different for each block row) or PETSC_NULL 2728 2729 Options Database Keys: 2730 . -mat_no_unroll - uses code that does not unroll the loops in the 2731 block calculations (much slower) 2732 . -mat_block_size - size of the blocks to use 2733 2734 Level: intermediate 2735 2736 Notes: 2737 If the nnz parameter is given then the nz parameter is ignored 2738 2739 The block AIJ format is fully compatible with standard Fortran 77 2740 storage. That is, the stored row and column indices can begin at 2741 either one (as in Fortran) or zero. See the users' manual for details. 2742 2743 Specify the preallocated storage with either nz or nnz (not both). 2744 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2745 allocation. For additional details, see the users manual chapter on 2746 matrices. 2747 2748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2749 @*/ 2750 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2751 { 2752 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2753 2754 PetscFunctionBegin; 2755 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2756 if (f) { 2757 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2758 } 2759 PetscFunctionReturn(0); 2760 } 2761 2762