1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 785 PetscInt *tia, *tja; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 792 } else { 793 tia = a->i; tja = a->j; 794 } 795 796 if (!blockcompressed) { 797 /* malloc & create the natural set of indices */ 798 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 799 for (i=0; i<n+1; i++) { 800 for (j=0; j<bs; j++) { 801 *ia[i*bs+j] = tia[i]*bs+j; 802 } 803 } 804 for (i=0; i<nz; i++) { 805 for (j=0; j<bs; j++) { 806 *ja[i*bs+j] = tia[i]*bs+j; 807 } 808 } 809 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 810 ierr = PetscFree(tia);CHKERRQ(ierr); 811 ierr = PetscFree(tja);CHKERRQ(ierr); 812 } 813 } else { 814 *ia = tia; 815 *ja = tja; 816 } 817 if (oshift == 1) { 818 for (i=0; i<nz; i++) (*ja)[i]++; 819 for (i=0; i<n+1; i++) (*ia)[i]++; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 PetscInt i,n = a->mbs,nz = a->i[n]; 831 832 PetscFunctionBegin; 833 if (!ia) PetscFunctionReturn(0); 834 if (!blockcompressed) { 835 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 836 }else if (symmetric) { 837 ierr = PetscFree(*ia);CHKERRQ(ierr); 838 ierr = PetscFree(*ja);CHKERRQ(ierr); 839 } else if (oshift == 1) { /* blockcompressed */ 840 for (i=0; i<nz; i++) a->j[i]--; 841 for (i=0; i<n+1; i++) a->i[i]--; 842 } 843 PetscFunctionReturn(0); 844 } 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDestroy_SeqBAIJ" 848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 851 PetscErrorCode ierr; 852 853 PetscFunctionBegin; 854 #if defined(PETSC_USE_LOG) 855 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 856 #endif 857 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 858 if (a->row) { 859 ierr = ISDestroy(a->row);CHKERRQ(ierr); 860 } 861 if (a->col) { 862 ierr = ISDestroy(a->col);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(a->diag);CHKERRQ(ierr); 865 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 866 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 867 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 868 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 869 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 870 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 871 #if defined(PETSC_USE_MAT_SINGLE) 872 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 873 #endif 874 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 875 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 876 877 ierr = PetscFree(a);CHKERRQ(ierr); 878 879 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatSetOption_SeqBAIJ" 892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 893 { 894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 switch (op) { 899 case MAT_ROW_ORIENTED: 900 a->roworiented = flg; 901 break; 902 case MAT_COLUMNS_SORTED: 903 a->sorted = flg; 904 break; 905 case MAT_KEEP_ZEROED_ROWS: 906 a->keepzeroedrows = flg; 907 break; 908 case MAT_NO_NEW_NONZERO_LOCATIONS: 909 a->nonew = (flg ? 1 : 0); 910 break; 911 case MAT_NEW_NONZERO_LOCATION_ERR: 912 a->nonew = (flg ? -1 : 0); 913 break; 914 case MAT_NEW_NONZERO_ALLOCATION_ERR: 915 a->nonew = (flg ? -2 : 0); 916 break; 917 case MAT_ROWS_SORTED: 918 case MAT_NEW_DIAGONALS: 919 case MAT_IGNORE_OFF_PROC_ENTRIES: 920 case MAT_USE_HASH_TABLE: 921 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 922 break; 923 case MAT_SYMMETRIC: 924 case MAT_STRUCTURALLY_SYMMETRIC: 925 case MAT_HERMITIAN: 926 case MAT_SYMMETRY_ETERNAL: 927 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 928 break; 929 default: 930 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatGetRow_SeqBAIJ" 937 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 938 { 939 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 940 PetscErrorCode ierr; 941 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 942 MatScalar *aa,*aa_i; 943 PetscScalar *v_i; 944 945 PetscFunctionBegin; 946 bs = A->rmap.bs; 947 ai = a->i; 948 aj = a->j; 949 aa = a->a; 950 bs2 = a->bs2; 951 952 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 953 954 bn = row/bs; /* Block number */ 955 bp = row % bs; /* Block Position */ 956 M = ai[bn+1] - ai[bn]; 957 *nz = bs*M; 958 959 if (v) { 960 *v = 0; 961 if (*nz) { 962 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 963 for (i=0; i<M; i++) { /* for each block in the block row */ 964 v_i = *v + i*bs; 965 aa_i = aa + bs2*(ai[bn] + i); 966 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 967 } 968 } 969 } 970 971 if (idx) { 972 *idx = 0; 973 if (*nz) { 974 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 975 for (i=0; i<M; i++) { /* for each block in the block row */ 976 idx_i = *idx + i*bs; 977 itmp = bs*aj[ai[bn] + i]; 978 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 979 } 980 } 981 } 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 987 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 988 { 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 993 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatTranspose_SeqBAIJ" 999 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1000 { 1001 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1002 Mat C; 1003 PetscErrorCode ierr; 1004 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1005 PetscInt *rows,*cols,bs2=a->bs2; 1006 PetscScalar *array; 1007 1008 PetscFunctionBegin; 1009 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1010 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1011 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1012 1013 #if defined(PETSC_USE_MAT_SINGLE) 1014 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1015 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1016 #else 1017 array = a->a; 1018 #endif 1019 1020 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1021 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1022 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1023 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1024 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1025 ierr = PetscFree(col);CHKERRQ(ierr); 1026 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1027 cols = rows + bs; 1028 for (i=0; i<mbs; i++) { 1029 cols[0] = i*bs; 1030 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1031 len = ai[i+1] - ai[i]; 1032 for (j=0; j<len; j++) { 1033 rows[0] = (*aj++)*bs; 1034 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1035 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1036 array += bs2; 1037 } 1038 } 1039 ierr = PetscFree(rows);CHKERRQ(ierr); 1040 #if defined(PETSC_USE_MAT_SINGLE) 1041 ierr = PetscFree(array); 1042 #endif 1043 1044 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1045 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1046 1047 if (B) { 1048 *B = C; 1049 } else { 1050 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1057 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1058 { 1059 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1060 PetscErrorCode ierr; 1061 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1062 int fd; 1063 PetscScalar *aa; 1064 FILE *file; 1065 1066 PetscFunctionBegin; 1067 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1068 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1069 col_lens[0] = MAT_FILE_COOKIE; 1070 1071 col_lens[1] = A->rmap.N; 1072 col_lens[2] = A->cmap.n; 1073 col_lens[3] = a->nz*bs2; 1074 1075 /* store lengths of each row and write (including header) to file */ 1076 count = 0; 1077 for (i=0; i<a->mbs; i++) { 1078 for (j=0; j<bs; j++) { 1079 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1080 } 1081 } 1082 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1083 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1084 1085 /* store column indices (zero start index) */ 1086 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1087 count = 0; 1088 for (i=0; i<a->mbs; i++) { 1089 for (j=0; j<bs; j++) { 1090 for (k=a->i[i]; k<a->i[i+1]; k++) { 1091 for (l=0; l<bs; l++) { 1092 jj[count++] = bs*a->j[k] + l; 1093 } 1094 } 1095 } 1096 } 1097 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1098 ierr = PetscFree(jj);CHKERRQ(ierr); 1099 1100 /* store nonzero values */ 1101 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1102 count = 0; 1103 for (i=0; i<a->mbs; i++) { 1104 for (j=0; j<bs; j++) { 1105 for (k=a->i[i]; k<a->i[i+1]; k++) { 1106 for (l=0; l<bs; l++) { 1107 aa[count++] = a->a[bs2*k + l*bs + j]; 1108 } 1109 } 1110 } 1111 } 1112 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1113 ierr = PetscFree(aa);CHKERRQ(ierr); 1114 1115 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1116 if (file) { 1117 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1124 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1125 { 1126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1129 PetscViewerFormat format; 1130 1131 PetscFunctionBegin; 1132 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1133 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1134 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1135 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1136 Mat aij; 1137 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1138 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1139 ierr = MatDestroy(aij);CHKERRQ(ierr); 1140 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1141 PetscFunctionReturn(0); 1142 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1143 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1144 for (i=0; i<a->mbs; i++) { 1145 for (j=0; j<bs; j++) { 1146 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1147 for (k=a->i[i]; k<a->i[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 #if defined(PETSC_USE_COMPLEX) 1150 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1151 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1152 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1153 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1154 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1155 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1156 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1157 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1158 } 1159 #else 1160 if (a->a[bs2*k + l*bs + j] != 0.0) { 1161 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1162 } 1163 #endif 1164 } 1165 } 1166 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1167 } 1168 } 1169 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1170 } else { 1171 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1172 for (i=0; i<a->mbs; i++) { 1173 for (j=0; j<bs; j++) { 1174 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1175 for (k=a->i[i]; k<a->i[i+1]; k++) { 1176 for (l=0; l<bs; l++) { 1177 #if defined(PETSC_USE_COMPLEX) 1178 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1179 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1180 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1181 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1182 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1183 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1184 } else { 1185 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1186 } 1187 #else 1188 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1189 #endif 1190 } 1191 } 1192 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1193 } 1194 } 1195 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1196 } 1197 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1203 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1204 { 1205 Mat A = (Mat) Aa; 1206 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1209 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1210 MatScalar *aa; 1211 PetscViewer viewer; 1212 1213 PetscFunctionBegin; 1214 1215 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1216 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1217 1218 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1219 1220 /* loop over matrix elements drawing boxes */ 1221 color = PETSC_DRAW_BLUE; 1222 for (i=0,row=0; i<mbs; i++,row+=bs) { 1223 for (j=a->i[i]; j<a->i[i+1]; j++) { 1224 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1225 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1226 aa = a->a + j*bs2; 1227 for (k=0; k<bs; k++) { 1228 for (l=0; l<bs; l++) { 1229 if (PetscRealPart(*aa++) >= 0.) continue; 1230 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1231 } 1232 } 1233 } 1234 } 1235 color = PETSC_DRAW_CYAN; 1236 for (i=0,row=0; i<mbs; i++,row+=bs) { 1237 for (j=a->i[i]; j<a->i[i+1]; j++) { 1238 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1239 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1240 aa = a->a + j*bs2; 1241 for (k=0; k<bs; k++) { 1242 for (l=0; l<bs; l++) { 1243 if (PetscRealPart(*aa++) != 0.) continue; 1244 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1245 } 1246 } 1247 } 1248 } 1249 1250 color = PETSC_DRAW_RED; 1251 for (i=0,row=0; i<mbs; i++,row+=bs) { 1252 for (j=a->i[i]; j<a->i[i+1]; j++) { 1253 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1255 aa = a->a + j*bs2; 1256 for (k=0; k<bs; k++) { 1257 for (l=0; l<bs; l++) { 1258 if (PetscRealPart(*aa++) <= 0.) continue; 1259 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1260 } 1261 } 1262 } 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1269 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1270 { 1271 PetscErrorCode ierr; 1272 PetscReal xl,yl,xr,yr,w,h; 1273 PetscDraw draw; 1274 PetscTruth isnull; 1275 1276 PetscFunctionBegin; 1277 1278 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1279 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1280 1281 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1282 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1283 xr += w; yr += h; xl = -w; yl = -h; 1284 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1285 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1286 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatView_SeqBAIJ" 1292 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1293 { 1294 PetscErrorCode ierr; 1295 PetscTruth iascii,isbinary,isdraw; 1296 1297 PetscFunctionBegin; 1298 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1299 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1300 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1301 if (iascii){ 1302 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1303 } else if (isbinary) { 1304 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1305 } else if (isdraw) { 1306 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1307 } else { 1308 Mat B; 1309 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1310 ierr = MatView(B,viewer);CHKERRQ(ierr); 1311 ierr = MatDestroy(B);CHKERRQ(ierr); 1312 } 1313 PetscFunctionReturn(0); 1314 } 1315 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1319 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1320 { 1321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1322 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1323 PetscInt *ai = a->i,*ailen = a->ilen; 1324 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1325 MatScalar *ap,*aa = a->a; 1326 1327 PetscFunctionBegin; 1328 for (k=0; k<m; k++) { /* loop over rows */ 1329 row = im[k]; brow = row/bs; 1330 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1331 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1332 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1333 nrow = ailen[brow]; 1334 for (l=0; l<n; l++) { /* loop over columns */ 1335 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1336 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1337 col = in[l] ; 1338 bcol = col/bs; 1339 cidx = col%bs; 1340 ridx = row%bs; 1341 high = nrow; 1342 low = 0; /* assume unsorted */ 1343 while (high-low > 5) { 1344 t = (low+high)/2; 1345 if (rp[t] > bcol) high = t; 1346 else low = t; 1347 } 1348 for (i=low; i<high; i++) { 1349 if (rp[i] > bcol) break; 1350 if (rp[i] == bcol) { 1351 *v++ = ap[bs2*i+bs*cidx+ridx]; 1352 goto finished; 1353 } 1354 } 1355 *v++ = 0.0; 1356 finished:; 1357 } 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #if defined(PETSC_USE_MAT_SINGLE) 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1365 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1366 { 1367 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1368 PetscErrorCode ierr; 1369 PetscInt i,N = m*n*b->bs2; 1370 MatScalar *vsingle; 1371 1372 PetscFunctionBegin; 1373 if (N > b->setvalueslen) { 1374 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1375 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1376 b->setvalueslen = N; 1377 } 1378 vsingle = b->setvaluescopy; 1379 for (i=0; i<N; i++) { 1380 vsingle[i] = v[i]; 1381 } 1382 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 #endif 1386 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1390 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1391 { 1392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1393 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1394 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1395 PetscErrorCode ierr; 1396 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1397 PetscTruth roworiented=a->roworiented; 1398 const MatScalar *value = v; 1399 MatScalar *ap,*aa = a->a,*bap; 1400 1401 PetscFunctionBegin; 1402 if (roworiented) { 1403 stepval = (n-1)*bs; 1404 } else { 1405 stepval = (m-1)*bs; 1406 } 1407 for (k=0; k<m; k++) { /* loop over added rows */ 1408 row = im[k]; 1409 if (row < 0) continue; 1410 #if defined(PETSC_USE_DEBUG) 1411 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1412 #endif 1413 rp = aj + ai[row]; 1414 ap = aa + bs2*ai[row]; 1415 rmax = imax[row]; 1416 nrow = ailen[row]; 1417 low = 0; 1418 high = nrow; 1419 for (l=0; l<n; l++) { /* loop over added columns */ 1420 if (in[l] < 0) continue; 1421 #if defined(PETSC_USE_DEBUG) 1422 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1423 #endif 1424 col = in[l]; 1425 if (roworiented) { 1426 value = v + k*(stepval+bs)*bs + l*bs; 1427 } else { 1428 value = v + l*(stepval+bs)*bs + k*bs; 1429 } 1430 if (col <= lastcol) low = 0; else high = nrow; 1431 lastcol = col; 1432 while (high-low > 7) { 1433 t = (low+high)/2; 1434 if (rp[t] > col) high = t; 1435 else low = t; 1436 } 1437 for (i=low; i<high; i++) { 1438 if (rp[i] > col) break; 1439 if (rp[i] == col) { 1440 bap = ap + bs2*i; 1441 if (roworiented) { 1442 if (is == ADD_VALUES) { 1443 for (ii=0; ii<bs; ii++,value+=stepval) { 1444 for (jj=ii; jj<bs2; jj+=bs) { 1445 bap[jj] += *value++; 1446 } 1447 } 1448 } else { 1449 for (ii=0; ii<bs; ii++,value+=stepval) { 1450 for (jj=ii; jj<bs2; jj+=bs) { 1451 bap[jj] = *value++; 1452 } 1453 } 1454 } 1455 } else { 1456 if (is == ADD_VALUES) { 1457 for (ii=0; ii<bs; ii++,value+=stepval) { 1458 for (jj=0; jj<bs; jj++) { 1459 *bap++ += *value++; 1460 } 1461 } 1462 } else { 1463 for (ii=0; ii<bs; ii++,value+=stepval) { 1464 for (jj=0; jj<bs; jj++) { 1465 *bap++ = *value++; 1466 } 1467 } 1468 } 1469 } 1470 goto noinsert2; 1471 } 1472 } 1473 if (nonew == 1) goto noinsert2; 1474 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1475 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1476 N = nrow++ - 1; high++; 1477 /* shift up all the later entries in this row */ 1478 for (ii=N; ii>=i; ii--) { 1479 rp[ii+1] = rp[ii]; 1480 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1481 } 1482 if (N >= i) { 1483 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1484 } 1485 rp[i] = col; 1486 bap = ap + bs2*i; 1487 if (roworiented) { 1488 for (ii=0; ii<bs; ii++,value+=stepval) { 1489 for (jj=ii; jj<bs2; jj+=bs) { 1490 bap[jj] = *value++; 1491 } 1492 } 1493 } else { 1494 for (ii=0; ii<bs; ii++,value+=stepval) { 1495 for (jj=0; jj<bs; jj++) { 1496 *bap++ = *value++; 1497 } 1498 } 1499 } 1500 noinsert2:; 1501 low = i; 1502 } 1503 ailen[row] = nrow; 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1510 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1511 { 1512 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1513 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1514 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1515 PetscErrorCode ierr; 1516 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1517 MatScalar *aa = a->a,*ap; 1518 PetscReal ratio=0.6; 1519 1520 PetscFunctionBegin; 1521 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1522 1523 if (m) rmax = ailen[0]; 1524 for (i=1; i<mbs; i++) { 1525 /* move each row back by the amount of empty slots (fshift) before it*/ 1526 fshift += imax[i-1] - ailen[i-1]; 1527 rmax = PetscMax(rmax,ailen[i]); 1528 if (fshift) { 1529 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1530 N = ailen[i]; 1531 for (j=0; j<N; j++) { 1532 ip[j-fshift] = ip[j]; 1533 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1534 } 1535 } 1536 ai[i] = ai[i-1] + ailen[i-1]; 1537 } 1538 if (mbs) { 1539 fshift += imax[mbs-1] - ailen[mbs-1]; 1540 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1541 } 1542 /* reset ilen and imax for each row */ 1543 for (i=0; i<mbs; i++) { 1544 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1545 } 1546 a->nz = ai[mbs]; 1547 1548 /* diagonals may have moved, so kill the diagonal pointers */ 1549 a->idiagvalid = PETSC_FALSE; 1550 if (fshift && a->diag) { 1551 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1552 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1553 a->diag = 0; 1554 } 1555 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1556 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1557 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1558 a->reallocs = 0; 1559 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1560 1561 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1562 if (a->compressedrow.use){ 1563 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1564 } 1565 1566 A->same_nonzero = PETSC_TRUE; 1567 PetscFunctionReturn(0); 1568 } 1569 1570 /* 1571 This function returns an array of flags which indicate the locations of contiguous 1572 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1573 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1574 Assume: sizes should be long enough to hold all the values. 1575 */ 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1578 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1579 { 1580 PetscInt i,j,k,row; 1581 PetscTruth flg; 1582 1583 PetscFunctionBegin; 1584 for (i=0,j=0; i<n; j++) { 1585 row = idx[i]; 1586 if (row%bs!=0) { /* Not the begining of a block */ 1587 sizes[j] = 1; 1588 i++; 1589 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1590 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1591 i++; 1592 } else { /* Begining of the block, so check if the complete block exists */ 1593 flg = PETSC_TRUE; 1594 for (k=1; k<bs; k++) { 1595 if (row+k != idx[i+k]) { /* break in the block */ 1596 flg = PETSC_FALSE; 1597 break; 1598 } 1599 } 1600 if (flg) { /* No break in the bs */ 1601 sizes[j] = bs; 1602 i+= bs; 1603 } else { 1604 sizes[j] = 1; 1605 i++; 1606 } 1607 } 1608 } 1609 *bs_max = j; 1610 PetscFunctionReturn(0); 1611 } 1612 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1615 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1616 { 1617 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1618 PetscErrorCode ierr; 1619 PetscInt i,j,k,count,*rows; 1620 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1621 PetscScalar zero = 0.0; 1622 MatScalar *aa; 1623 1624 PetscFunctionBegin; 1625 /* Make a copy of the IS and sort it */ 1626 /* allocate memory for rows,sizes */ 1627 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1628 sizes = rows + is_n; 1629 1630 /* copy IS values to rows, and sort them */ 1631 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1632 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1633 if (baij->keepzeroedrows) { 1634 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1635 bs_max = is_n; 1636 A->same_nonzero = PETSC_TRUE; 1637 } else { 1638 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1639 A->same_nonzero = PETSC_FALSE; 1640 } 1641 1642 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1643 row = rows[j]; 1644 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1645 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1646 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1647 if (sizes[i] == bs && !baij->keepzeroedrows) { 1648 if (diag != 0.0) { 1649 if (baij->ilen[row/bs] > 0) { 1650 baij->ilen[row/bs] = 1; 1651 baij->j[baij->i[row/bs]] = row/bs; 1652 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1653 } 1654 /* Now insert all the diagonal values for this bs */ 1655 for (k=0; k<bs; k++) { 1656 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 } else { /* (diag == 0.0) */ 1659 baij->ilen[row/bs] = 0; 1660 } /* end (diag == 0.0) */ 1661 } else { /* (sizes[i] != bs) */ 1662 #if defined (PETSC_USE_DEBUG) 1663 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1664 #endif 1665 for (k=0; k<count; k++) { 1666 aa[0] = zero; 1667 aa += bs; 1668 } 1669 if (diag != 0.0) { 1670 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1671 } 1672 } 1673 } 1674 1675 ierr = PetscFree(rows);CHKERRQ(ierr); 1676 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1682 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1683 { 1684 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1685 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1686 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1687 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1688 PetscErrorCode ierr; 1689 PetscInt ridx,cidx,bs2=a->bs2; 1690 PetscTruth roworiented=a->roworiented; 1691 MatScalar *ap,value,*aa=a->a,*bap; 1692 1693 PetscFunctionBegin; 1694 for (k=0; k<m; k++) { /* loop over added rows */ 1695 row = im[k]; 1696 brow = row/bs; 1697 if (row < 0) continue; 1698 #if defined(PETSC_USE_DEBUG) 1699 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1700 #endif 1701 rp = aj + ai[brow]; 1702 ap = aa + bs2*ai[brow]; 1703 rmax = imax[brow]; 1704 nrow = ailen[brow]; 1705 low = 0; 1706 high = nrow; 1707 for (l=0; l<n; l++) { /* loop over added columns */ 1708 if (in[l] < 0) continue; 1709 #if defined(PETSC_USE_DEBUG) 1710 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1711 #endif 1712 col = in[l]; bcol = col/bs; 1713 ridx = row % bs; cidx = col % bs; 1714 if (roworiented) { 1715 value = v[l + k*n]; 1716 } else { 1717 value = v[k + l*m]; 1718 } 1719 if (col <= lastcol) low = 0; else high = nrow; 1720 lastcol = col; 1721 while (high-low > 7) { 1722 t = (low+high)/2; 1723 if (rp[t] > bcol) high = t; 1724 else low = t; 1725 } 1726 for (i=low; i<high; i++) { 1727 if (rp[i] > bcol) break; 1728 if (rp[i] == bcol) { 1729 bap = ap + bs2*i + bs*cidx + ridx; 1730 if (is == ADD_VALUES) *bap += value; 1731 else *bap = value; 1732 goto noinsert1; 1733 } 1734 } 1735 if (nonew == 1) goto noinsert1; 1736 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1737 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1738 N = nrow++ - 1; high++; 1739 /* shift up all the later entries in this row */ 1740 for (ii=N; ii>=i; ii--) { 1741 rp[ii+1] = rp[ii]; 1742 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1743 } 1744 if (N>=i) { 1745 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1746 } 1747 rp[i] = bcol; 1748 ap[bs2*i + bs*cidx + ridx] = value; 1749 a->nz++; 1750 noinsert1:; 1751 low = i; 1752 } 1753 ailen[brow] = nrow; 1754 } 1755 A->same_nonzero = PETSC_FALSE; 1756 PetscFunctionReturn(0); 1757 } 1758 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1762 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1763 { 1764 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1765 Mat outA; 1766 PetscErrorCode ierr; 1767 PetscTruth row_identity,col_identity; 1768 1769 PetscFunctionBegin; 1770 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1771 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1772 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1773 if (!row_identity || !col_identity) { 1774 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1775 } 1776 1777 outA = inA; 1778 inA->factor = FACTOR_LU; 1779 1780 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1781 1782 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1783 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1784 a->row = row; 1785 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1786 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1787 a->col = col; 1788 1789 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1790 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1791 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1792 1793 /* 1794 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1795 for ILU(0) factorization with natural ordering 1796 */ 1797 if (inA->rmap.bs < 8) { 1798 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1799 } else { 1800 if (!a->solve_work) { 1801 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1802 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1803 } 1804 } 1805 1806 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1807 1808 PetscFunctionReturn(0); 1809 } 1810 1811 EXTERN_C_BEGIN 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1814 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1815 { 1816 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1817 PetscInt i,nz,nbs; 1818 1819 PetscFunctionBegin; 1820 nz = baij->maxnz/baij->bs2; 1821 nbs = baij->nbs; 1822 for (i=0; i<nz; i++) { 1823 baij->j[i] = indices[i]; 1824 } 1825 baij->nz = nz; 1826 for (i=0; i<nbs; i++) { 1827 baij->ilen[i] = baij->imax[i]; 1828 } 1829 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1836 /*@ 1837 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1838 in the matrix. 1839 1840 Input Parameters: 1841 + mat - the SeqBAIJ matrix 1842 - indices - the column indices 1843 1844 Level: advanced 1845 1846 Notes: 1847 This can be called if you have precomputed the nonzero structure of the 1848 matrix and want to provide it to the matrix object to improve the performance 1849 of the MatSetValues() operation. 1850 1851 You MUST have set the correct numbers of nonzeros per row in the call to 1852 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1853 1854 MUST be called before any calls to MatSetValues(); 1855 1856 @*/ 1857 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1858 { 1859 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1860 1861 PetscFunctionBegin; 1862 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1863 PetscValidPointer(indices,2); 1864 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1865 if (f) { 1866 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1867 } else { 1868 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1869 } 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1875 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1876 { 1877 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1878 PetscErrorCode ierr; 1879 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1880 PetscReal atmp; 1881 PetscScalar *x,zero = 0.0; 1882 MatScalar *aa; 1883 PetscInt ncols,brow,krow,kcol; 1884 1885 PetscFunctionBegin; 1886 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1887 bs = A->rmap.bs; 1888 aa = a->a; 1889 ai = a->i; 1890 aj = a->j; 1891 mbs = a->mbs; 1892 1893 ierr = VecSet(v,zero);CHKERRQ(ierr); 1894 if (idx) { 1895 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1896 } 1897 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1898 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1899 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1900 for (i=0; i<mbs; i++) { 1901 ncols = ai[1] - ai[0]; ai++; 1902 brow = bs*i; 1903 for (j=0; j<ncols; j++){ 1904 for (kcol=0; kcol<bs; kcol++){ 1905 for (krow=0; krow<bs; krow++){ 1906 atmp = PetscAbsScalar(*aa);aa++; 1907 row = brow + krow; /* row index */ 1908 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1909 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1910 } 1911 } 1912 aj++; 1913 } 1914 } 1915 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatCopy_SeqBAIJ" 1921 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1922 { 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 /* If the two matrices have the same copy implementation, use fast copy. */ 1927 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1928 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1929 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1930 1931 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1932 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1933 } 1934 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1935 } else { 1936 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1943 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1944 { 1945 PetscErrorCode ierr; 1946 1947 PetscFunctionBegin; 1948 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1954 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1955 { 1956 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1957 PetscFunctionBegin; 1958 *array = a->a; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1964 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #include "petscblaslapack.h" 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1973 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1974 { 1975 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1976 PetscErrorCode ierr; 1977 PetscInt i,bs=Y->rmap.bs,j,bs2; 1978 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1979 1980 PetscFunctionBegin; 1981 if (str == SAME_NONZERO_PATTERN) { 1982 PetscScalar alpha = a; 1983 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1984 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1985 if (y->xtoy && y->XtoY != X) { 1986 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1987 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1988 } 1989 if (!y->xtoy) { /* get xtoy */ 1990 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1991 y->XtoY = X; 1992 } 1993 bs2 = bs*bs; 1994 for (i=0; i<x->nz; i++) { 1995 j = 0; 1996 while (j < bs2){ 1997 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1998 j++; 1999 } 2000 } 2001 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2002 } else { 2003 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2004 } 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2010 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2011 { 2012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2013 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2014 PetscScalar *aa = a->a; 2015 2016 PetscFunctionBegin; 2017 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2023 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2024 { 2025 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2026 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2027 PetscScalar *aa = a->a; 2028 2029 PetscFunctionBegin; 2030 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 2035 /* -------------------------------------------------------------------*/ 2036 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2037 MatGetRow_SeqBAIJ, 2038 MatRestoreRow_SeqBAIJ, 2039 MatMult_SeqBAIJ_N, 2040 /* 4*/ MatMultAdd_SeqBAIJ_N, 2041 MatMultTranspose_SeqBAIJ, 2042 MatMultTransposeAdd_SeqBAIJ, 2043 MatSolve_SeqBAIJ_N, 2044 0, 2045 0, 2046 /*10*/ 0, 2047 MatLUFactor_SeqBAIJ, 2048 0, 2049 0, 2050 MatTranspose_SeqBAIJ, 2051 /*15*/ MatGetInfo_SeqBAIJ, 2052 MatEqual_SeqBAIJ, 2053 MatGetDiagonal_SeqBAIJ, 2054 MatDiagonalScale_SeqBAIJ, 2055 MatNorm_SeqBAIJ, 2056 /*20*/ 0, 2057 MatAssemblyEnd_SeqBAIJ, 2058 0, 2059 MatSetOption_SeqBAIJ, 2060 MatZeroEntries_SeqBAIJ, 2061 /*25*/ MatZeroRows_SeqBAIJ, 2062 MatLUFactorSymbolic_SeqBAIJ, 2063 MatLUFactorNumeric_SeqBAIJ_N, 2064 MatCholeskyFactorSymbolic_SeqBAIJ, 2065 MatCholeskyFactorNumeric_SeqBAIJ_N, 2066 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2067 MatILUFactorSymbolic_SeqBAIJ, 2068 MatICCFactorSymbolic_SeqBAIJ, 2069 MatGetArray_SeqBAIJ, 2070 MatRestoreArray_SeqBAIJ, 2071 /*35*/ MatDuplicate_SeqBAIJ, 2072 0, 2073 0, 2074 MatILUFactor_SeqBAIJ, 2075 0, 2076 /*40*/ MatAXPY_SeqBAIJ, 2077 MatGetSubMatrices_SeqBAIJ, 2078 MatIncreaseOverlap_SeqBAIJ, 2079 MatGetValues_SeqBAIJ, 2080 MatCopy_SeqBAIJ, 2081 /*45*/ 0, 2082 MatScale_SeqBAIJ, 2083 0, 2084 0, 2085 0, 2086 /*50*/ 0, 2087 MatGetRowIJ_SeqBAIJ, 2088 MatRestoreRowIJ_SeqBAIJ, 2089 0, 2090 0, 2091 /*55*/ 0, 2092 0, 2093 0, 2094 0, 2095 MatSetValuesBlocked_SeqBAIJ, 2096 /*60*/ MatGetSubMatrix_SeqBAIJ, 2097 MatDestroy_SeqBAIJ, 2098 MatView_SeqBAIJ, 2099 0, 2100 0, 2101 /*65*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2107 MatConvert_Basic, 2108 0, 2109 0, 2110 0, 2111 /*75*/ 0, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*80*/ 0, 2117 0, 2118 0, 2119 0, 2120 MatLoad_SeqBAIJ, 2121 /*85*/ 0, 2122 0, 2123 0, 2124 0, 2125 0, 2126 /*90*/ 0, 2127 0, 2128 0, 2129 0, 2130 0, 2131 /*95*/ 0, 2132 0, 2133 0, 2134 0, 2135 0, 2136 /*100*/0, 2137 0, 2138 0, 2139 0, 2140 0, 2141 /*105*/0, 2142 MatRealPart_SeqBAIJ, 2143 MatImaginaryPart_SeqBAIJ 2144 }; 2145 2146 EXTERN_C_BEGIN 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2149 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2150 { 2151 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2152 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 if (aij->nonew != 1) { 2157 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first"); 2158 } 2159 2160 /* allocate space for values if not already there */ 2161 if (!aij->saved_values) { 2162 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2163 } 2164 2165 /* copy values over */ 2166 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 EXTERN_C_END 2170 2171 EXTERN_C_BEGIN 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2174 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2175 { 2176 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2177 PetscErrorCode ierr; 2178 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2179 2180 PetscFunctionBegin; 2181 if (aij->nonew != 1) { 2182 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first"); 2183 } 2184 if (!aij->saved_values) { 2185 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2186 } 2187 2188 /* copy values over */ 2189 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 EXTERN_C_END 2193 2194 EXTERN_C_BEGIN 2195 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2196 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2197 EXTERN_C_END 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2203 { 2204 Mat_SeqBAIJ *b; 2205 PetscErrorCode ierr; 2206 PetscInt i,mbs,nbs,bs2,newbs = bs; 2207 PetscTruth flg,skipallocation = PETSC_FALSE; 2208 2209 PetscFunctionBegin; 2210 2211 if (nz == MAT_SKIP_ALLOCATION) { 2212 skipallocation = PETSC_TRUE; 2213 nz = 0; 2214 } 2215 2216 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2217 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2218 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2219 2220 if (nnz && newbs != bs) { 2221 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2222 } 2223 bs = newbs; 2224 2225 B->rmap.bs = B->cmap.bs = bs; 2226 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2227 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2228 2229 B->preallocated = PETSC_TRUE; 2230 2231 mbs = B->rmap.n/bs; 2232 nbs = B->cmap.n/bs; 2233 bs2 = bs*bs; 2234 2235 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2236 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2237 } 2238 2239 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2240 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2241 if (nnz) { 2242 for (i=0; i<mbs; i++) { 2243 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2244 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2245 } 2246 } 2247 2248 b = (Mat_SeqBAIJ*)B->data; 2249 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2250 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2251 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2252 2253 B->ops->solve = MatSolve_SeqBAIJ_Update; 2254 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2255 if (!flg) { 2256 switch (bs) { 2257 case 1: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2259 B->ops->mult = MatMult_SeqBAIJ_1; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2261 break; 2262 case 2: 2263 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2264 B->ops->mult = MatMult_SeqBAIJ_2; 2265 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2266 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2267 break; 2268 case 3: 2269 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2270 B->ops->mult = MatMult_SeqBAIJ_3; 2271 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2272 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2273 break; 2274 case 4: 2275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2276 B->ops->mult = MatMult_SeqBAIJ_4; 2277 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2278 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2279 break; 2280 case 5: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2282 B->ops->mult = MatMult_SeqBAIJ_5; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2284 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2285 break; 2286 case 6: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2288 B->ops->mult = MatMult_SeqBAIJ_6; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2290 break; 2291 case 7: 2292 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2293 B->ops->mult = MatMult_SeqBAIJ_7; 2294 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2295 break; 2296 default: 2297 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2298 B->ops->mult = MatMult_SeqBAIJ_N; 2299 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2300 break; 2301 } 2302 } 2303 B->rmap.bs = bs; 2304 b->mbs = mbs; 2305 b->nbs = nbs; 2306 if (!skipallocation) { 2307 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2308 /* b->ilen will count nonzeros in each block row so far. */ 2309 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2310 if (!nnz) { 2311 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2312 else if (nz <= 0) nz = 1; 2313 for (i=0; i<mbs; i++) b->imax[i] = nz; 2314 nz = nz*mbs; 2315 } else { 2316 nz = 0; 2317 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2318 } 2319 2320 /* allocate the matrix space */ 2321 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2322 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2323 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2324 b->singlemalloc = PETSC_TRUE; 2325 2326 b->i[0] = 0; 2327 for (i=1; i<mbs+1; i++) { 2328 b->i[i] = b->i[i-1] + b->imax[i-1]; 2329 } 2330 b->free_a = PETSC_TRUE; 2331 b->free_ij = PETSC_TRUE; 2332 } else { 2333 b->free_a = PETSC_FALSE; 2334 b->free_ij = PETSC_FALSE; 2335 } 2336 2337 B->rmap.bs = bs; 2338 b->bs2 = bs2; 2339 b->mbs = mbs; 2340 b->nz = 0; 2341 b->maxnz = nz*bs2; 2342 B->info.nz_unneeded = (PetscReal)b->maxnz; 2343 PetscFunctionReturn(0); 2344 } 2345 EXTERN_C_END 2346 2347 /*MC 2348 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2349 block sparse compressed row format. 2350 2351 Options Database Keys: 2352 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2353 2354 Level: beginner 2355 2356 .seealso: MatCreateSeqBAIJ() 2357 M*/ 2358 2359 EXTERN_C_BEGIN 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatCreate_SeqBAIJ" 2362 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2363 { 2364 PetscErrorCode ierr; 2365 PetscMPIInt size; 2366 Mat_SeqBAIJ *b; 2367 2368 PetscFunctionBegin; 2369 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2370 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2371 2372 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2373 B->data = (void*)b; 2374 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2375 B->factor = 0; 2376 B->mapping = 0; 2377 b->row = 0; 2378 b->col = 0; 2379 b->icol = 0; 2380 b->reallocs = 0; 2381 b->saved_values = 0; 2382 #if defined(PETSC_USE_MAT_SINGLE) 2383 b->setvalueslen = 0; 2384 b->setvaluescopy = PETSC_NULL; 2385 #endif 2386 2387 b->sorted = PETSC_FALSE; 2388 b->roworiented = PETSC_TRUE; 2389 b->nonew = 0; 2390 b->diag = 0; 2391 b->solve_work = 0; 2392 b->mult_work = 0; 2393 B->spptr = 0; 2394 B->info.nz_unneeded = (PetscReal)b->maxnz; 2395 b->keepzeroedrows = PETSC_FALSE; 2396 b->xtoy = 0; 2397 b->XtoY = 0; 2398 b->compressedrow.use = PETSC_FALSE; 2399 b->compressedrow.nrows = 0; 2400 b->compressedrow.i = PETSC_NULL; 2401 b->compressedrow.rindex = PETSC_NULL; 2402 b->compressedrow.checked = PETSC_FALSE; 2403 B->same_nonzero = PETSC_FALSE; 2404 2405 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2406 "MatInvertBlockDiagonal_SeqBAIJ", 2407 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2409 "MatStoreValues_SeqBAIJ", 2410 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2412 "MatRetrieveValues_SeqBAIJ", 2413 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2415 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2416 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2417 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2418 "MatConvert_SeqBAIJ_SeqAIJ", 2419 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2420 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2421 "MatConvert_SeqBAIJ_SeqSBAIJ", 2422 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2423 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2424 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2425 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2426 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 EXTERN_C_END 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2433 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2434 { 2435 Mat C; 2436 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2437 PetscErrorCode ierr; 2438 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2439 2440 PetscFunctionBegin; 2441 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2442 2443 *B = 0; 2444 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2445 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2446 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2447 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2448 c = (Mat_SeqBAIJ*)C->data; 2449 2450 C->rmap.N = A->rmap.N; 2451 C->cmap.N = A->cmap.N; 2452 C->rmap.bs = A->rmap.bs; 2453 c->bs2 = a->bs2; 2454 c->mbs = a->mbs; 2455 c->nbs = a->nbs; 2456 2457 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2458 for (i=0; i<mbs; i++) { 2459 c->imax[i] = a->imax[i]; 2460 c->ilen[i] = a->ilen[i]; 2461 } 2462 2463 /* allocate the matrix space */ 2464 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2465 c->singlemalloc = PETSC_TRUE; 2466 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2467 if (mbs > 0) { 2468 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2469 if (cpvalues == MAT_COPY_VALUES) { 2470 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2471 } else { 2472 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2473 } 2474 } 2475 c->sorted = a->sorted; 2476 c->roworiented = a->roworiented; 2477 c->nonew = a->nonew; 2478 2479 if (a->diag) { 2480 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2481 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2482 for (i=0; i<mbs; i++) { 2483 c->diag[i] = a->diag[i]; 2484 } 2485 } else c->diag = 0; 2486 c->nz = a->nz; 2487 c->maxnz = a->maxnz; 2488 c->solve_work = 0; 2489 c->mult_work = 0; 2490 c->free_a = PETSC_TRUE; 2491 c->free_ij = PETSC_TRUE; 2492 C->preallocated = PETSC_TRUE; 2493 C->assembled = PETSC_TRUE; 2494 2495 c->compressedrow.use = a->compressedrow.use; 2496 c->compressedrow.nrows = a->compressedrow.nrows; 2497 c->compressedrow.checked = a->compressedrow.checked; 2498 if ( a->compressedrow.checked && a->compressedrow.use){ 2499 i = a->compressedrow.nrows; 2500 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2501 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2502 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2503 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2504 } else { 2505 c->compressedrow.use = PETSC_FALSE; 2506 c->compressedrow.i = PETSC_NULL; 2507 c->compressedrow.rindex = PETSC_NULL; 2508 } 2509 C->same_nonzero = A->same_nonzero; 2510 *B = C; 2511 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "MatLoad_SeqBAIJ" 2517 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2518 { 2519 Mat_SeqBAIJ *a; 2520 Mat B; 2521 PetscErrorCode ierr; 2522 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2523 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2524 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2525 PetscInt *masked,nmask,tmp,bs2,ishift; 2526 PetscMPIInt size; 2527 int fd; 2528 PetscScalar *aa; 2529 MPI_Comm comm = ((PetscObject)viewer)->comm; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2533 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2534 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2535 bs2 = bs*bs; 2536 2537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2538 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2539 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2540 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2541 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2542 M = header[1]; N = header[2]; nz = header[3]; 2543 2544 if (header[3] < 0) { 2545 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2546 } 2547 2548 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2549 2550 /* 2551 This code adds extra rows to make sure the number of rows is 2552 divisible by the blocksize 2553 */ 2554 mbs = M/bs; 2555 extra_rows = bs - M + bs*(mbs); 2556 if (extra_rows == bs) extra_rows = 0; 2557 else mbs++; 2558 if (extra_rows) { 2559 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2560 } 2561 2562 /* read in row lengths */ 2563 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2564 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2565 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2566 2567 /* read in column indices */ 2568 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2569 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2570 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2571 2572 /* loop over row lengths determining block row lengths */ 2573 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2574 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2575 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2576 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2577 masked = mask + mbs; 2578 rowcount = 0; nzcount = 0; 2579 for (i=0; i<mbs; i++) { 2580 nmask = 0; 2581 for (j=0; j<bs; j++) { 2582 kmax = rowlengths[rowcount]; 2583 for (k=0; k<kmax; k++) { 2584 tmp = jj[nzcount++]/bs; 2585 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2586 } 2587 rowcount++; 2588 } 2589 browlengths[i] += nmask; 2590 /* zero out the mask elements we set */ 2591 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2592 } 2593 2594 /* create our matrix */ 2595 ierr = MatCreate(comm,&B); 2596 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2597 ierr = MatSetType(B,type);CHKERRQ(ierr); 2598 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2599 a = (Mat_SeqBAIJ*)B->data; 2600 2601 /* set matrix "i" values */ 2602 a->i[0] = 0; 2603 for (i=1; i<= mbs; i++) { 2604 a->i[i] = a->i[i-1] + browlengths[i-1]; 2605 a->ilen[i-1] = browlengths[i-1]; 2606 } 2607 a->nz = 0; 2608 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2609 2610 /* read in nonzero values */ 2611 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2612 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2613 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2614 2615 /* set "a" and "j" values into matrix */ 2616 nzcount = 0; jcount = 0; 2617 for (i=0; i<mbs; i++) { 2618 nzcountb = nzcount; 2619 nmask = 0; 2620 for (j=0; j<bs; j++) { 2621 kmax = rowlengths[i*bs+j]; 2622 for (k=0; k<kmax; k++) { 2623 tmp = jj[nzcount++]/bs; 2624 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2625 } 2626 } 2627 /* sort the masked values */ 2628 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2629 2630 /* set "j" values into matrix */ 2631 maskcount = 1; 2632 for (j=0; j<nmask; j++) { 2633 a->j[jcount++] = masked[j]; 2634 mask[masked[j]] = maskcount++; 2635 } 2636 /* set "a" values into matrix */ 2637 ishift = bs2*a->i[i]; 2638 for (j=0; j<bs; j++) { 2639 kmax = rowlengths[i*bs+j]; 2640 for (k=0; k<kmax; k++) { 2641 tmp = jj[nzcountb]/bs ; 2642 block = mask[tmp] - 1; 2643 point = jj[nzcountb] - bs*tmp; 2644 idx = ishift + bs2*block + j + bs*point; 2645 a->a[idx] = (MatScalar)aa[nzcountb++]; 2646 } 2647 } 2648 /* zero out the mask elements we set */ 2649 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2650 } 2651 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2652 2653 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2654 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2655 ierr = PetscFree(aa);CHKERRQ(ierr); 2656 ierr = PetscFree(jj);CHKERRQ(ierr); 2657 ierr = PetscFree(mask);CHKERRQ(ierr); 2658 2659 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = MatView_Private(B);CHKERRQ(ierr); 2662 2663 *A = B; 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatCreateSeqBAIJ" 2669 /*@C 2670 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2671 compressed row) format. For good matrix assembly performance the 2672 user should preallocate the matrix storage by setting the parameter nz 2673 (or the array nnz). By setting these parameters accurately, performance 2674 during matrix assembly can be increased by more than a factor of 50. 2675 2676 Collective on MPI_Comm 2677 2678 Input Parameters: 2679 + comm - MPI communicator, set to PETSC_COMM_SELF 2680 . bs - size of block 2681 . m - number of rows 2682 . n - number of columns 2683 . nz - number of nonzero blocks per block row (same for all rows) 2684 - nnz - array containing the number of nonzero blocks in the various block rows 2685 (possibly different for each block row) or PETSC_NULL 2686 2687 Output Parameter: 2688 . A - the matrix 2689 2690 Options Database Keys: 2691 . -mat_no_unroll - uses code that does not unroll the loops in the 2692 block calculations (much slower) 2693 . -mat_block_size - size of the blocks to use 2694 2695 Level: intermediate 2696 2697 Notes: 2698 The number of rows and columns must be divisible by blocksize. 2699 2700 If the nnz parameter is given then the nz parameter is ignored 2701 2702 A nonzero block is any block that as 1 or more nonzeros in it 2703 2704 The block AIJ format is fully compatible with standard Fortran 77 2705 storage. That is, the stored row and column indices can begin at 2706 either one (as in Fortran) or zero. See the users' manual for details. 2707 2708 Specify the preallocated storage with either nz or nnz (not both). 2709 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2710 allocation. For additional details, see the users manual chapter on 2711 matrices. 2712 2713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2714 @*/ 2715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2716 { 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2721 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2722 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2723 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2729 /*@C 2730 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2731 per row in the matrix. For good matrix assembly performance the 2732 user should preallocate the matrix storage by setting the parameter nz 2733 (or the array nnz). By setting these parameters accurately, performance 2734 during matrix assembly can be increased by more than a factor of 50. 2735 2736 Collective on MPI_Comm 2737 2738 Input Parameters: 2739 + A - the matrix 2740 . bs - size of block 2741 . nz - number of block nonzeros per block row (same for all rows) 2742 - nnz - array containing the number of block nonzeros in the various block rows 2743 (possibly different for each block row) or PETSC_NULL 2744 2745 Options Database Keys: 2746 . -mat_no_unroll - uses code that does not unroll the loops in the 2747 block calculations (much slower) 2748 . -mat_block_size - size of the blocks to use 2749 2750 Level: intermediate 2751 2752 Notes: 2753 If the nnz parameter is given then the nz parameter is ignored 2754 2755 The block AIJ format is fully compatible with standard Fortran 77 2756 storage. That is, the stored row and column indices can begin at 2757 either one (as in Fortran) or zero. See the users' manual for details. 2758 2759 Specify the preallocated storage with either nz or nnz (not both). 2760 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2761 allocation. For additional details, see the users manual chapter on 2762 matrices. 2763 2764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2765 @*/ 2766 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2767 { 2768 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2769 2770 PetscFunctionBegin; 2771 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2772 if (f) { 2773 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2774 } 2775 PetscFunctionReturn(0); 2776 } 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2780 /*@ 2781 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2782 (upper triangular entries in CSR format) provided by the user. 2783 2784 Collective on MPI_Comm 2785 2786 Input Parameters: 2787 + comm - must be an MPI communicator of size 1 2788 . bs - size of block 2789 . m - number of rows 2790 . n - number of columns 2791 . i - row indices 2792 . j - column indices 2793 - a - matrix values 2794 2795 Output Parameter: 2796 . mat - the matrix 2797 2798 Level: intermediate 2799 2800 Notes: 2801 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2802 once the matrix is destroyed 2803 2804 You cannot set new nonzero locations into this matrix, that will generate an error. 2805 2806 The i and j indices are 0 based 2807 2808 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2809 2810 @*/ 2811 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2812 { 2813 PetscErrorCode ierr; 2814 PetscInt ii; 2815 Mat_SeqBAIJ *baij; 2816 2817 PetscFunctionBegin; 2818 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2819 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2820 2821 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2822 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2823 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2824 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2825 baij = (Mat_SeqBAIJ*)(*mat)->data; 2826 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2827 2828 baij->i = i; 2829 baij->j = j; 2830 baij->a = a; 2831 baij->singlemalloc = PETSC_FALSE; 2832 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2833 baij->free_a = PETSC_FALSE; 2834 baij->free_ij = PETSC_FALSE; 2835 2836 for (ii=0; ii<m; ii++) { 2837 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2838 #if defined(PETSC_USE_DEBUG) 2839 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2840 #endif 2841 } 2842 #if defined(PETSC_USE_DEBUG) 2843 for (ii=0; ii<baij->i[m]; ii++) { 2844 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2845 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2846 } 2847 #endif 2848 2849 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2850 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853