1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 *missing = PETSC_FALSE; 743 diag = a->diag; 744 for (i=0; i<a->mbs; i++) { 745 if (jj[diag[i]] != i) { 746 *missing = PETSC_TRUE; 747 if (d) *d = i; 748 } 749 } 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 755 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 756 { 757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,m = a->mbs; 760 761 PetscFunctionBegin; 762 if (!a->diag) { 763 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 764 } 765 for (i=0; i<m; i++) { 766 a->diag[i] = a->i[i+1]; 767 for (j=a->i[i]; j<a->i[i+1]; j++) { 768 if (a->j[j] == i) { 769 a->diag[i] = j; 770 break; 771 } 772 } 773 } 774 PetscFunctionReturn(0); 775 } 776 777 778 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 782 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 783 { 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 785 PetscErrorCode ierr; 786 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 787 PetscInt *tia, *tja; 788 789 PetscFunctionBegin; 790 *nn = n; 791 if (!ia) PetscFunctionReturn(0); 792 if (symmetric) { 793 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 794 } else { 795 tia = a->i; tja = a->j; 796 } 797 798 if (!blockcompressed) { 799 /* malloc & create the natural set of indices */ 800 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 801 for (i=0; i<n+1; i++) { 802 for (j=0; j<bs; j++) { 803 *ia[i*bs+j] = tia[i]*bs+j; 804 } 805 } 806 for (i=0; i<nz; i++) { 807 for (j=0; j<bs; j++) { 808 *ja[i*bs+j] = tia[i]*bs+j; 809 } 810 } 811 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 812 ierr = PetscFree(tia);CHKERRQ(ierr); 813 ierr = PetscFree(tja);CHKERRQ(ierr); 814 } 815 } else { 816 *ia = tia; 817 *ja = tja; 818 } 819 if (oshift == 1) { 820 for (i=0; i<nz; i++) (*ja)[i]++; 821 for (i=0; i<n+1; i++) (*ia)[i]++; 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 828 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 829 { 830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 831 PetscErrorCode ierr; 832 PetscInt i,n = a->mbs,nz = a->i[n]; 833 834 PetscFunctionBegin; 835 if (!ia) PetscFunctionReturn(0); 836 if (!blockcompressed) { 837 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 838 }else if (symmetric) { 839 ierr = PetscFree(*ia);CHKERRQ(ierr); 840 ierr = PetscFree(*ja);CHKERRQ(ierr); 841 } else if (oshift == 1) { /* blockcompressed */ 842 for (i=0; i<nz; i++) a->j[i]--; 843 for (i=0; i<n+1; i++) a->i[i]--; 844 } 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatDestroy_SeqBAIJ" 850 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 851 { 852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 853 PetscErrorCode ierr; 854 855 PetscFunctionBegin; 856 #if defined(PETSC_USE_LOG) 857 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 858 #endif 859 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 860 if (a->row) { 861 ierr = ISDestroy(a->row);CHKERRQ(ierr); 862 } 863 if (a->col) { 864 ierr = ISDestroy(a->col);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(a->diag);CHKERRQ(ierr); 867 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 868 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 869 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 870 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 871 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 872 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 873 #if defined(PETSC_USE_MAT_SINGLE) 874 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 875 #endif 876 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 877 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 878 879 ierr = PetscFree(a);CHKERRQ(ierr); 880 881 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatSetOption_SeqBAIJ" 894 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 895 { 896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 switch (op) { 901 case MAT_ROW_ORIENTED: 902 a->roworiented = flg; 903 break; 904 case MAT_KEEP_ZEROED_ROWS: 905 a->keepzeroedrows = flg; 906 break; 907 case MAT_NEW_NONZERO_LOCATIONS: 908 a->nonew = (flg ? 0 : 1); 909 break; 910 case MAT_NEW_NONZERO_LOCATION_ERR: 911 a->nonew = (flg ? -1 : 0); 912 break; 913 case MAT_NEW_NONZERO_ALLOCATION_ERR: 914 a->nonew = (flg ? -2 : 0); 915 break; 916 case MAT_NEW_DIAGONALS: 917 case MAT_IGNORE_OFF_PROC_ENTRIES: 918 case MAT_USE_HASH_TABLE: 919 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 920 break; 921 case MAT_SYMMETRIC: 922 case MAT_STRUCTURALLY_SYMMETRIC: 923 case MAT_HERMITIAN: 924 case MAT_SYMMETRY_ETERNAL: 925 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 926 break; 927 default: 928 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatGetRow_SeqBAIJ" 935 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 936 { 937 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 938 PetscErrorCode ierr; 939 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 940 MatScalar *aa,*aa_i; 941 PetscScalar *v_i; 942 943 PetscFunctionBegin; 944 bs = A->rmap.bs; 945 ai = a->i; 946 aj = a->j; 947 aa = a->a; 948 bs2 = a->bs2; 949 950 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 951 952 bn = row/bs; /* Block number */ 953 bp = row % bs; /* Block Position */ 954 M = ai[bn+1] - ai[bn]; 955 *nz = bs*M; 956 957 if (v) { 958 *v = 0; 959 if (*nz) { 960 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 961 for (i=0; i<M; i++) { /* for each block in the block row */ 962 v_i = *v + i*bs; 963 aa_i = aa + bs2*(ai[bn] + i); 964 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 965 } 966 } 967 } 968 969 if (idx) { 970 *idx = 0; 971 if (*nz) { 972 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 973 for (i=0; i<M; i++) { /* for each block in the block row */ 974 idx_i = *idx + i*bs; 975 itmp = bs*aj[ai[bn] + i]; 976 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 977 } 978 } 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 985 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 986 { 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 991 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatTranspose_SeqBAIJ" 997 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 998 { 999 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1000 Mat C; 1001 PetscErrorCode ierr; 1002 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1003 PetscInt *rows,*cols,bs2=a->bs2; 1004 PetscScalar *array; 1005 1006 PetscFunctionBegin; 1007 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1008 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1009 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1010 1011 #if defined(PETSC_USE_MAT_SINGLE) 1012 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1013 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1014 #else 1015 array = a->a; 1016 #endif 1017 1018 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1019 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1020 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1021 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1022 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1023 ierr = PetscFree(col);CHKERRQ(ierr); 1024 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1025 cols = rows + bs; 1026 for (i=0; i<mbs; i++) { 1027 cols[0] = i*bs; 1028 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1029 len = ai[i+1] - ai[i]; 1030 for (j=0; j<len; j++) { 1031 rows[0] = (*aj++)*bs; 1032 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1033 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1034 array += bs2; 1035 } 1036 } 1037 ierr = PetscFree(rows);CHKERRQ(ierr); 1038 #if defined(PETSC_USE_MAT_SINGLE) 1039 ierr = PetscFree(array); 1040 #endif 1041 1042 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1044 1045 if (B) { 1046 *B = C; 1047 } else { 1048 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1055 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1056 { 1057 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1058 PetscErrorCode ierr; 1059 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1060 int fd; 1061 PetscScalar *aa; 1062 FILE *file; 1063 1064 PetscFunctionBegin; 1065 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1066 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1067 col_lens[0] = MAT_FILE_COOKIE; 1068 1069 col_lens[1] = A->rmap.N; 1070 col_lens[2] = A->cmap.n; 1071 col_lens[3] = a->nz*bs2; 1072 1073 /* store lengths of each row and write (including header) to file */ 1074 count = 0; 1075 for (i=0; i<a->mbs; i++) { 1076 for (j=0; j<bs; j++) { 1077 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1078 } 1079 } 1080 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1081 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1082 1083 /* store column indices (zero start index) */ 1084 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1085 count = 0; 1086 for (i=0; i<a->mbs; i++) { 1087 for (j=0; j<bs; j++) { 1088 for (k=a->i[i]; k<a->i[i+1]; k++) { 1089 for (l=0; l<bs; l++) { 1090 jj[count++] = bs*a->j[k] + l; 1091 } 1092 } 1093 } 1094 } 1095 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1096 ierr = PetscFree(jj);CHKERRQ(ierr); 1097 1098 /* store nonzero values */ 1099 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1100 count = 0; 1101 for (i=0; i<a->mbs; i++) { 1102 for (j=0; j<bs; j++) { 1103 for (k=a->i[i]; k<a->i[i+1]; k++) { 1104 for (l=0; l<bs; l++) { 1105 aa[count++] = a->a[bs2*k + l*bs + j]; 1106 } 1107 } 1108 } 1109 } 1110 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1111 ierr = PetscFree(aa);CHKERRQ(ierr); 1112 1113 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1114 if (file) { 1115 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1122 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1123 { 1124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1127 PetscViewerFormat format; 1128 1129 PetscFunctionBegin; 1130 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1131 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1132 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1133 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1134 Mat aij; 1135 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1136 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1137 ierr = MatDestroy(aij);CHKERRQ(ierr); 1138 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1139 PetscFunctionReturn(0); 1140 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1141 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1142 for (i=0; i<a->mbs; i++) { 1143 for (j=0; j<bs; j++) { 1144 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1145 for (k=a->i[i]; k<a->i[i+1]; k++) { 1146 for (l=0; l<bs; l++) { 1147 #if defined(PETSC_USE_COMPLEX) 1148 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1150 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1153 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1154 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1155 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1156 } 1157 #else 1158 if (a->a[bs2*k + l*bs + j] != 0.0) { 1159 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1160 } 1161 #endif 1162 } 1163 } 1164 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1165 } 1166 } 1167 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1168 } else { 1169 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1170 for (i=0; i<a->mbs; i++) { 1171 for (j=0; j<bs; j++) { 1172 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1173 for (k=a->i[i]; k<a->i[i+1]; k++) { 1174 for (l=0; l<bs; l++) { 1175 #if defined(PETSC_USE_COMPLEX) 1176 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1178 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1181 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1182 } else { 1183 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1184 } 1185 #else 1186 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1187 #endif 1188 } 1189 } 1190 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1191 } 1192 } 1193 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1194 } 1195 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1201 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1202 { 1203 Mat A = (Mat) Aa; 1204 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1205 PetscErrorCode ierr; 1206 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1207 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1208 MatScalar *aa; 1209 PetscViewer viewer; 1210 1211 PetscFunctionBegin; 1212 1213 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1214 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1215 1216 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1217 1218 /* loop over matrix elements drawing boxes */ 1219 color = PETSC_DRAW_BLUE; 1220 for (i=0,row=0; i<mbs; i++,row+=bs) { 1221 for (j=a->i[i]; j<a->i[i+1]; j++) { 1222 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1223 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1224 aa = a->a + j*bs2; 1225 for (k=0; k<bs; k++) { 1226 for (l=0; l<bs; l++) { 1227 if (PetscRealPart(*aa++) >= 0.) continue; 1228 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1229 } 1230 } 1231 } 1232 } 1233 color = PETSC_DRAW_CYAN; 1234 for (i=0,row=0; i<mbs; i++,row+=bs) { 1235 for (j=a->i[i]; j<a->i[i+1]; j++) { 1236 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1237 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1238 aa = a->a + j*bs2; 1239 for (k=0; k<bs; k++) { 1240 for (l=0; l<bs; l++) { 1241 if (PetscRealPart(*aa++) != 0.) continue; 1242 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1243 } 1244 } 1245 } 1246 } 1247 1248 color = PETSC_DRAW_RED; 1249 for (i=0,row=0; i<mbs; i++,row+=bs) { 1250 for (j=a->i[i]; j<a->i[i+1]; j++) { 1251 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1252 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1253 aa = a->a + j*bs2; 1254 for (k=0; k<bs; k++) { 1255 for (l=0; l<bs; l++) { 1256 if (PetscRealPart(*aa++) <= 0.) continue; 1257 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1258 } 1259 } 1260 } 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1267 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1268 { 1269 PetscErrorCode ierr; 1270 PetscReal xl,yl,xr,yr,w,h; 1271 PetscDraw draw; 1272 PetscTruth isnull; 1273 1274 PetscFunctionBegin; 1275 1276 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1277 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1278 1279 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1280 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1281 xr += w; yr += h; xl = -w; yl = -h; 1282 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1283 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1284 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_SeqBAIJ" 1290 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1291 { 1292 PetscErrorCode ierr; 1293 PetscTruth iascii,isbinary,isdraw; 1294 1295 PetscFunctionBegin; 1296 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1297 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1298 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1299 if (iascii){ 1300 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1301 } else if (isbinary) { 1302 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1303 } else if (isdraw) { 1304 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1305 } else { 1306 Mat B; 1307 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1308 ierr = MatView(B,viewer);CHKERRQ(ierr); 1309 ierr = MatDestroy(B);CHKERRQ(ierr); 1310 } 1311 PetscFunctionReturn(0); 1312 } 1313 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1317 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1318 { 1319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1320 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1321 PetscInt *ai = a->i,*ailen = a->ilen; 1322 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1323 MatScalar *ap,*aa = a->a; 1324 1325 PetscFunctionBegin; 1326 for (k=0; k<m; k++) { /* loop over rows */ 1327 row = im[k]; brow = row/bs; 1328 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1329 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1330 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1331 nrow = ailen[brow]; 1332 for (l=0; l<n; l++) { /* loop over columns */ 1333 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1334 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1335 col = in[l] ; 1336 bcol = col/bs; 1337 cidx = col%bs; 1338 ridx = row%bs; 1339 high = nrow; 1340 low = 0; /* assume unsorted */ 1341 while (high-low > 5) { 1342 t = (low+high)/2; 1343 if (rp[t] > bcol) high = t; 1344 else low = t; 1345 } 1346 for (i=low; i<high; i++) { 1347 if (rp[i] > bcol) break; 1348 if (rp[i] == bcol) { 1349 *v++ = ap[bs2*i+bs*cidx+ridx]; 1350 goto finished; 1351 } 1352 } 1353 *v++ = 0.0; 1354 finished:; 1355 } 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #if defined(PETSC_USE_MAT_SINGLE) 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1363 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1364 { 1365 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1366 PetscErrorCode ierr; 1367 PetscInt i,N = m*n*b->bs2; 1368 MatScalar *vsingle; 1369 1370 PetscFunctionBegin; 1371 if (N > b->setvalueslen) { 1372 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1373 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1374 b->setvalueslen = N; 1375 } 1376 vsingle = b->setvaluescopy; 1377 for (i=0; i<N; i++) { 1378 vsingle[i] = v[i]; 1379 } 1380 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 #endif 1384 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1388 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1389 { 1390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1391 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1392 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1393 PetscErrorCode ierr; 1394 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1395 PetscTruth roworiented=a->roworiented; 1396 const MatScalar *value = v; 1397 MatScalar *ap,*aa = a->a,*bap; 1398 1399 PetscFunctionBegin; 1400 if (roworiented) { 1401 stepval = (n-1)*bs; 1402 } else { 1403 stepval = (m-1)*bs; 1404 } 1405 for (k=0; k<m; k++) { /* loop over added rows */ 1406 row = im[k]; 1407 if (row < 0) continue; 1408 #if defined(PETSC_USE_DEBUG) 1409 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1410 #endif 1411 rp = aj + ai[row]; 1412 ap = aa + bs2*ai[row]; 1413 rmax = imax[row]; 1414 nrow = ailen[row]; 1415 low = 0; 1416 high = nrow; 1417 for (l=0; l<n; l++) { /* loop over added columns */ 1418 if (in[l] < 0) continue; 1419 #if defined(PETSC_USE_DEBUG) 1420 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1421 #endif 1422 col = in[l]; 1423 if (roworiented) { 1424 value = v + k*(stepval+bs)*bs + l*bs; 1425 } else { 1426 value = v + l*(stepval+bs)*bs + k*bs; 1427 } 1428 if (col <= lastcol) low = 0; else high = nrow; 1429 lastcol = col; 1430 while (high-low > 7) { 1431 t = (low+high)/2; 1432 if (rp[t] > col) high = t; 1433 else low = t; 1434 } 1435 for (i=low; i<high; i++) { 1436 if (rp[i] > col) break; 1437 if (rp[i] == col) { 1438 bap = ap + bs2*i; 1439 if (roworiented) { 1440 if (is == ADD_VALUES) { 1441 for (ii=0; ii<bs; ii++,value+=stepval) { 1442 for (jj=ii; jj<bs2; jj+=bs) { 1443 bap[jj] += *value++; 1444 } 1445 } 1446 } else { 1447 for (ii=0; ii<bs; ii++,value+=stepval) { 1448 for (jj=ii; jj<bs2; jj+=bs) { 1449 bap[jj] = *value++; 1450 } 1451 } 1452 } 1453 } else { 1454 if (is == ADD_VALUES) { 1455 for (ii=0; ii<bs; ii++,value+=stepval) { 1456 for (jj=0; jj<bs; jj++) { 1457 *bap++ += *value++; 1458 } 1459 } 1460 } else { 1461 for (ii=0; ii<bs; ii++,value+=stepval) { 1462 for (jj=0; jj<bs; jj++) { 1463 *bap++ = *value++; 1464 } 1465 } 1466 } 1467 } 1468 goto noinsert2; 1469 } 1470 } 1471 if (nonew == 1) goto noinsert2; 1472 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1473 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1474 N = nrow++ - 1; high++; 1475 /* shift up all the later entries in this row */ 1476 for (ii=N; ii>=i; ii--) { 1477 rp[ii+1] = rp[ii]; 1478 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1479 } 1480 if (N >= i) { 1481 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482 } 1483 rp[i] = col; 1484 bap = ap + bs2*i; 1485 if (roworiented) { 1486 for (ii=0; ii<bs; ii++,value+=stepval) { 1487 for (jj=ii; jj<bs2; jj+=bs) { 1488 bap[jj] = *value++; 1489 } 1490 } 1491 } else { 1492 for (ii=0; ii<bs; ii++,value+=stepval) { 1493 for (jj=0; jj<bs; jj++) { 1494 *bap++ = *value++; 1495 } 1496 } 1497 } 1498 noinsert2:; 1499 low = i; 1500 } 1501 ailen[row] = nrow; 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1508 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1509 { 1510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1511 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1512 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1513 PetscErrorCode ierr; 1514 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1515 MatScalar *aa = a->a,*ap; 1516 PetscReal ratio=0.6; 1517 1518 PetscFunctionBegin; 1519 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1520 1521 if (m) rmax = ailen[0]; 1522 for (i=1; i<mbs; i++) { 1523 /* move each row back by the amount of empty slots (fshift) before it*/ 1524 fshift += imax[i-1] - ailen[i-1]; 1525 rmax = PetscMax(rmax,ailen[i]); 1526 if (fshift) { 1527 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1528 N = ailen[i]; 1529 for (j=0; j<N; j++) { 1530 ip[j-fshift] = ip[j]; 1531 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1532 } 1533 } 1534 ai[i] = ai[i-1] + ailen[i-1]; 1535 } 1536 if (mbs) { 1537 fshift += imax[mbs-1] - ailen[mbs-1]; 1538 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1539 } 1540 /* reset ilen and imax for each row */ 1541 for (i=0; i<mbs; i++) { 1542 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1543 } 1544 a->nz = ai[mbs]; 1545 1546 /* diagonals may have moved, so kill the diagonal pointers */ 1547 a->idiagvalid = PETSC_FALSE; 1548 if (fshift && a->diag) { 1549 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1550 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1551 a->diag = 0; 1552 } 1553 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1554 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1555 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1556 a->reallocs = 0; 1557 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1558 1559 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1560 if (a->compressedrow.use){ 1561 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1562 } 1563 1564 A->same_nonzero = PETSC_TRUE; 1565 PetscFunctionReturn(0); 1566 } 1567 1568 /* 1569 This function returns an array of flags which indicate the locations of contiguous 1570 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1571 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1572 Assume: sizes should be long enough to hold all the values. 1573 */ 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1576 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1577 { 1578 PetscInt i,j,k,row; 1579 PetscTruth flg; 1580 1581 PetscFunctionBegin; 1582 for (i=0,j=0; i<n; j++) { 1583 row = idx[i]; 1584 if (row%bs!=0) { /* Not the begining of a block */ 1585 sizes[j] = 1; 1586 i++; 1587 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1588 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1589 i++; 1590 } else { /* Begining of the block, so check if the complete block exists */ 1591 flg = PETSC_TRUE; 1592 for (k=1; k<bs; k++) { 1593 if (row+k != idx[i+k]) { /* break in the block */ 1594 flg = PETSC_FALSE; 1595 break; 1596 } 1597 } 1598 if (flg) { /* No break in the bs */ 1599 sizes[j] = bs; 1600 i+= bs; 1601 } else { 1602 sizes[j] = 1; 1603 i++; 1604 } 1605 } 1606 } 1607 *bs_max = j; 1608 PetscFunctionReturn(0); 1609 } 1610 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1613 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1614 { 1615 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1616 PetscErrorCode ierr; 1617 PetscInt i,j,k,count,*rows; 1618 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1619 PetscScalar zero = 0.0; 1620 MatScalar *aa; 1621 1622 PetscFunctionBegin; 1623 /* Make a copy of the IS and sort it */ 1624 /* allocate memory for rows,sizes */ 1625 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1626 sizes = rows + is_n; 1627 1628 /* copy IS values to rows, and sort them */ 1629 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1630 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1631 if (baij->keepzeroedrows) { 1632 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1633 bs_max = is_n; 1634 A->same_nonzero = PETSC_TRUE; 1635 } else { 1636 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1637 A->same_nonzero = PETSC_FALSE; 1638 } 1639 1640 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1641 row = rows[j]; 1642 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1643 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1644 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1645 if (sizes[i] == bs && !baij->keepzeroedrows) { 1646 if (diag != 0.0) { 1647 if (baij->ilen[row/bs] > 0) { 1648 baij->ilen[row/bs] = 1; 1649 baij->j[baij->i[row/bs]] = row/bs; 1650 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1651 } 1652 /* Now insert all the diagonal values for this bs */ 1653 for (k=0; k<bs; k++) { 1654 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1655 } 1656 } else { /* (diag == 0.0) */ 1657 baij->ilen[row/bs] = 0; 1658 } /* end (diag == 0.0) */ 1659 } else { /* (sizes[i] != bs) */ 1660 #if defined (PETSC_USE_DEBUG) 1661 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1662 #endif 1663 for (k=0; k<count; k++) { 1664 aa[0] = zero; 1665 aa += bs; 1666 } 1667 if (diag != 0.0) { 1668 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1669 } 1670 } 1671 } 1672 1673 ierr = PetscFree(rows);CHKERRQ(ierr); 1674 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1680 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1681 { 1682 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1683 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1684 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1685 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1686 PetscErrorCode ierr; 1687 PetscInt ridx,cidx,bs2=a->bs2; 1688 PetscTruth roworiented=a->roworiented; 1689 MatScalar *ap,value,*aa=a->a,*bap; 1690 1691 PetscFunctionBegin; 1692 for (k=0; k<m; k++) { /* loop over added rows */ 1693 row = im[k]; 1694 brow = row/bs; 1695 if (row < 0) continue; 1696 #if defined(PETSC_USE_DEBUG) 1697 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1698 #endif 1699 rp = aj + ai[brow]; 1700 ap = aa + bs2*ai[brow]; 1701 rmax = imax[brow]; 1702 nrow = ailen[brow]; 1703 low = 0; 1704 high = nrow; 1705 for (l=0; l<n; l++) { /* loop over added columns */ 1706 if (in[l] < 0) continue; 1707 #if defined(PETSC_USE_DEBUG) 1708 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1709 #endif 1710 col = in[l]; bcol = col/bs; 1711 ridx = row % bs; cidx = col % bs; 1712 if (roworiented) { 1713 value = v[l + k*n]; 1714 } else { 1715 value = v[k + l*m]; 1716 } 1717 if (col <= lastcol) low = 0; else high = nrow; 1718 lastcol = col; 1719 while (high-low > 7) { 1720 t = (low+high)/2; 1721 if (rp[t] > bcol) high = t; 1722 else low = t; 1723 } 1724 for (i=low; i<high; i++) { 1725 if (rp[i] > bcol) break; 1726 if (rp[i] == bcol) { 1727 bap = ap + bs2*i + bs*cidx + ridx; 1728 if (is == ADD_VALUES) *bap += value; 1729 else *bap = value; 1730 goto noinsert1; 1731 } 1732 } 1733 if (nonew == 1) goto noinsert1; 1734 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1735 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1736 N = nrow++ - 1; high++; 1737 /* shift up all the later entries in this row */ 1738 for (ii=N; ii>=i; ii--) { 1739 rp[ii+1] = rp[ii]; 1740 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1741 } 1742 if (N>=i) { 1743 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1744 } 1745 rp[i] = bcol; 1746 ap[bs2*i + bs*cidx + ridx] = value; 1747 a->nz++; 1748 noinsert1:; 1749 low = i; 1750 } 1751 ailen[brow] = nrow; 1752 } 1753 A->same_nonzero = PETSC_FALSE; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1760 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1761 { 1762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1763 Mat outA; 1764 PetscErrorCode ierr; 1765 PetscTruth row_identity,col_identity; 1766 1767 PetscFunctionBegin; 1768 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1769 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1770 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1771 if (!row_identity || !col_identity) { 1772 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1773 } 1774 1775 outA = inA; 1776 inA->factor = FACTOR_LU; 1777 1778 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1779 1780 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1781 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1782 a->row = row; 1783 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1784 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1785 a->col = col; 1786 1787 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1788 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1789 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1790 1791 /* 1792 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1793 for ILU(0) factorization with natural ordering 1794 */ 1795 if (inA->rmap.bs < 8) { 1796 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1797 } else { 1798 if (!a->solve_work) { 1799 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1800 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1801 } 1802 } 1803 1804 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1805 1806 PetscFunctionReturn(0); 1807 } 1808 1809 EXTERN_C_BEGIN 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1812 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1813 { 1814 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1815 PetscInt i,nz,nbs; 1816 1817 PetscFunctionBegin; 1818 nz = baij->maxnz/baij->bs2; 1819 nbs = baij->nbs; 1820 for (i=0; i<nz; i++) { 1821 baij->j[i] = indices[i]; 1822 } 1823 baij->nz = nz; 1824 for (i=0; i<nbs; i++) { 1825 baij->ilen[i] = baij->imax[i]; 1826 } 1827 1828 PetscFunctionReturn(0); 1829 } 1830 EXTERN_C_END 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1834 /*@ 1835 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1836 in the matrix. 1837 1838 Input Parameters: 1839 + mat - the SeqBAIJ matrix 1840 - indices - the column indices 1841 1842 Level: advanced 1843 1844 Notes: 1845 This can be called if you have precomputed the nonzero structure of the 1846 matrix and want to provide it to the matrix object to improve the performance 1847 of the MatSetValues() operation. 1848 1849 You MUST have set the correct numbers of nonzeros per row in the call to 1850 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1851 1852 MUST be called before any calls to MatSetValues(); 1853 1854 @*/ 1855 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1856 { 1857 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1858 1859 PetscFunctionBegin; 1860 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1861 PetscValidPointer(indices,2); 1862 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1863 if (f) { 1864 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1865 } else { 1866 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1873 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1874 { 1875 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1876 PetscErrorCode ierr; 1877 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1878 PetscReal atmp; 1879 PetscScalar *x,zero = 0.0; 1880 MatScalar *aa; 1881 PetscInt ncols,brow,krow,kcol; 1882 1883 PetscFunctionBegin; 1884 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1885 bs = A->rmap.bs; 1886 aa = a->a; 1887 ai = a->i; 1888 aj = a->j; 1889 mbs = a->mbs; 1890 1891 ierr = VecSet(v,zero);CHKERRQ(ierr); 1892 if (idx) { 1893 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1894 } 1895 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1896 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1897 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1898 for (i=0; i<mbs; i++) { 1899 ncols = ai[1] - ai[0]; ai++; 1900 brow = bs*i; 1901 for (j=0; j<ncols; j++){ 1902 for (kcol=0; kcol<bs; kcol++){ 1903 for (krow=0; krow<bs; krow++){ 1904 atmp = PetscAbsScalar(*aa);aa++; 1905 row = brow + krow; /* row index */ 1906 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1907 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1908 } 1909 } 1910 aj++; 1911 } 1912 } 1913 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatCopy_SeqBAIJ" 1919 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1920 { 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 /* If the two matrices have the same copy implementation, use fast copy. */ 1925 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1927 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1928 1929 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1930 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1931 } 1932 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1933 } else { 1934 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1935 } 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1941 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1942 { 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1952 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1953 { 1954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1955 PetscFunctionBegin; 1956 *array = a->a; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1962 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1963 { 1964 PetscFunctionBegin; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #include "petscblaslapack.h" 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1971 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1972 { 1973 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1974 PetscErrorCode ierr; 1975 PetscInt i,bs=Y->rmap.bs,j,bs2; 1976 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1977 1978 PetscFunctionBegin; 1979 if (str == SAME_NONZERO_PATTERN) { 1980 PetscScalar alpha = a; 1981 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1982 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1983 if (y->xtoy && y->XtoY != X) { 1984 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1985 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1986 } 1987 if (!y->xtoy) { /* get xtoy */ 1988 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1989 y->XtoY = X; 1990 } 1991 bs2 = bs*bs; 1992 for (i=0; i<x->nz; i++) { 1993 j = 0; 1994 while (j < bs2){ 1995 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1996 j++; 1997 } 1998 } 1999 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2000 } else { 2001 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2002 } 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2008 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2009 { 2010 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2011 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2012 PetscScalar *aa = a->a; 2013 2014 PetscFunctionBegin; 2015 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2021 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2022 { 2023 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2024 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2025 PetscScalar *aa = a->a; 2026 2027 PetscFunctionBegin; 2028 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 2033 /* -------------------------------------------------------------------*/ 2034 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2035 MatGetRow_SeqBAIJ, 2036 MatRestoreRow_SeqBAIJ, 2037 MatMult_SeqBAIJ_N, 2038 /* 4*/ MatMultAdd_SeqBAIJ_N, 2039 MatMultTranspose_SeqBAIJ, 2040 MatMultTransposeAdd_SeqBAIJ, 2041 MatSolve_SeqBAIJ_N, 2042 0, 2043 0, 2044 /*10*/ 0, 2045 MatLUFactor_SeqBAIJ, 2046 0, 2047 0, 2048 MatTranspose_SeqBAIJ, 2049 /*15*/ MatGetInfo_SeqBAIJ, 2050 MatEqual_SeqBAIJ, 2051 MatGetDiagonal_SeqBAIJ, 2052 MatDiagonalScale_SeqBAIJ, 2053 MatNorm_SeqBAIJ, 2054 /*20*/ 0, 2055 MatAssemblyEnd_SeqBAIJ, 2056 0, 2057 MatSetOption_SeqBAIJ, 2058 MatZeroEntries_SeqBAIJ, 2059 /*25*/ MatZeroRows_SeqBAIJ, 2060 MatLUFactorSymbolic_SeqBAIJ, 2061 MatLUFactorNumeric_SeqBAIJ_N, 2062 MatCholeskyFactorSymbolic_SeqBAIJ, 2063 MatCholeskyFactorNumeric_SeqBAIJ_N, 2064 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2065 MatILUFactorSymbolic_SeqBAIJ, 2066 MatICCFactorSymbolic_SeqBAIJ, 2067 MatGetArray_SeqBAIJ, 2068 MatRestoreArray_SeqBAIJ, 2069 /*35*/ MatDuplicate_SeqBAIJ, 2070 0, 2071 0, 2072 MatILUFactor_SeqBAIJ, 2073 0, 2074 /*40*/ MatAXPY_SeqBAIJ, 2075 MatGetSubMatrices_SeqBAIJ, 2076 MatIncreaseOverlap_SeqBAIJ, 2077 MatGetValues_SeqBAIJ, 2078 MatCopy_SeqBAIJ, 2079 /*45*/ 0, 2080 MatScale_SeqBAIJ, 2081 0, 2082 0, 2083 0, 2084 /*50*/ 0, 2085 MatGetRowIJ_SeqBAIJ, 2086 MatRestoreRowIJ_SeqBAIJ, 2087 0, 2088 0, 2089 /*55*/ 0, 2090 0, 2091 0, 2092 0, 2093 MatSetValuesBlocked_SeqBAIJ, 2094 /*60*/ MatGetSubMatrix_SeqBAIJ, 2095 MatDestroy_SeqBAIJ, 2096 MatView_SeqBAIJ, 2097 0, 2098 0, 2099 /*65*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2105 MatConvert_Basic, 2106 0, 2107 0, 2108 0, 2109 /*75*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*80*/ 0, 2115 0, 2116 0, 2117 0, 2118 MatLoad_SeqBAIJ, 2119 /*85*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*90*/ 0, 2125 0, 2126 0, 2127 0, 2128 0, 2129 /*95*/ 0, 2130 0, 2131 0, 2132 0, 2133 0, 2134 /*100*/0, 2135 0, 2136 0, 2137 0, 2138 0, 2139 /*105*/0, 2140 MatRealPart_SeqBAIJ, 2141 MatImaginaryPart_SeqBAIJ, 2142 0, 2143 0, 2144 /*110*/0, 2145 0, 2146 0, 2147 0, 2148 MatMissingDiagonal_SeqBAIJ 2149 /*115*/ 2150 }; 2151 2152 EXTERN_C_BEGIN 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2155 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2156 { 2157 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2158 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 if (aij->nonew != 1) { 2163 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2164 } 2165 2166 /* allocate space for values if not already there */ 2167 if (!aij->saved_values) { 2168 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2169 } 2170 2171 /* copy values over */ 2172 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 EXTERN_C_END 2176 2177 EXTERN_C_BEGIN 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2180 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2181 { 2182 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2183 PetscErrorCode ierr; 2184 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2185 2186 PetscFunctionBegin; 2187 if (aij->nonew != 1) { 2188 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2189 } 2190 if (!aij->saved_values) { 2191 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2192 } 2193 2194 /* copy values over */ 2195 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 EXTERN_C_END 2199 2200 EXTERN_C_BEGIN 2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2202 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2203 EXTERN_C_END 2204 2205 EXTERN_C_BEGIN 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2208 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2209 { 2210 Mat_SeqBAIJ *b; 2211 PetscErrorCode ierr; 2212 PetscInt i,mbs,nbs,bs2,newbs = bs; 2213 PetscTruth flg,skipallocation = PETSC_FALSE; 2214 2215 PetscFunctionBegin; 2216 2217 if (nz == MAT_SKIP_ALLOCATION) { 2218 skipallocation = PETSC_TRUE; 2219 nz = 0; 2220 } 2221 2222 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2223 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2224 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2225 2226 if (nnz && newbs != bs) { 2227 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2228 } 2229 bs = newbs; 2230 2231 B->rmap.bs = B->cmap.bs = bs; 2232 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2233 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2234 2235 B->preallocated = PETSC_TRUE; 2236 2237 mbs = B->rmap.n/bs; 2238 nbs = B->cmap.n/bs; 2239 bs2 = bs*bs; 2240 2241 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2242 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2243 } 2244 2245 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2246 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2247 if (nnz) { 2248 for (i=0; i<mbs; i++) { 2249 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2250 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2251 } 2252 } 2253 2254 b = (Mat_SeqBAIJ*)B->data; 2255 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2256 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2257 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2258 2259 B->ops->solve = MatSolve_SeqBAIJ_Update; 2260 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2261 if (!flg) { 2262 switch (bs) { 2263 case 1: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2265 B->ops->mult = MatMult_SeqBAIJ_1; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2267 break; 2268 case 2: 2269 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2270 B->ops->mult = MatMult_SeqBAIJ_2; 2271 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2272 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2273 break; 2274 case 3: 2275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2276 B->ops->mult = MatMult_SeqBAIJ_3; 2277 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2278 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2279 break; 2280 case 4: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2282 B->ops->mult = MatMult_SeqBAIJ_4; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2284 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2285 break; 2286 case 5: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2288 B->ops->mult = MatMult_SeqBAIJ_5; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2290 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2291 break; 2292 case 6: 2293 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2294 B->ops->mult = MatMult_SeqBAIJ_6; 2295 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2296 break; 2297 case 7: 2298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2299 B->ops->mult = MatMult_SeqBAIJ_7; 2300 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2301 break; 2302 default: 2303 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2304 B->ops->mult = MatMult_SeqBAIJ_N; 2305 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2306 break; 2307 } 2308 } 2309 B->rmap.bs = bs; 2310 b->mbs = mbs; 2311 b->nbs = nbs; 2312 if (!skipallocation) { 2313 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2314 /* b->ilen will count nonzeros in each block row so far. */ 2315 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2316 if (!nnz) { 2317 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2318 else if (nz <= 0) nz = 1; 2319 for (i=0; i<mbs; i++) b->imax[i] = nz; 2320 nz = nz*mbs; 2321 } else { 2322 nz = 0; 2323 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2324 } 2325 2326 /* allocate the matrix space */ 2327 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2328 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2329 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2330 b->singlemalloc = PETSC_TRUE; 2331 2332 b->i[0] = 0; 2333 for (i=1; i<mbs+1; i++) { 2334 b->i[i] = b->i[i-1] + b->imax[i-1]; 2335 } 2336 b->free_a = PETSC_TRUE; 2337 b->free_ij = PETSC_TRUE; 2338 } else { 2339 b->free_a = PETSC_FALSE; 2340 b->free_ij = PETSC_FALSE; 2341 } 2342 2343 B->rmap.bs = bs; 2344 b->bs2 = bs2; 2345 b->mbs = mbs; 2346 b->nz = 0; 2347 b->maxnz = nz*bs2; 2348 B->info.nz_unneeded = (PetscReal)b->maxnz; 2349 PetscFunctionReturn(0); 2350 } 2351 EXTERN_C_END 2352 2353 /*MC 2354 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2355 block sparse compressed row format. 2356 2357 Options Database Keys: 2358 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2359 2360 Level: beginner 2361 2362 .seealso: MatCreateSeqBAIJ() 2363 M*/ 2364 2365 EXTERN_C_BEGIN 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatCreate_SeqBAIJ" 2368 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2369 { 2370 PetscErrorCode ierr; 2371 PetscMPIInt size; 2372 Mat_SeqBAIJ *b; 2373 2374 PetscFunctionBegin; 2375 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2376 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2377 2378 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2379 B->data = (void*)b; 2380 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2381 B->factor = 0; 2382 B->mapping = 0; 2383 b->row = 0; 2384 b->col = 0; 2385 b->icol = 0; 2386 b->reallocs = 0; 2387 b->saved_values = 0; 2388 #if defined(PETSC_USE_MAT_SINGLE) 2389 b->setvalueslen = 0; 2390 b->setvaluescopy = PETSC_NULL; 2391 #endif 2392 2393 b->roworiented = PETSC_TRUE; 2394 b->nonew = 0; 2395 b->diag = 0; 2396 b->solve_work = 0; 2397 b->mult_work = 0; 2398 B->spptr = 0; 2399 B->info.nz_unneeded = (PetscReal)b->maxnz; 2400 b->keepzeroedrows = PETSC_FALSE; 2401 b->xtoy = 0; 2402 b->XtoY = 0; 2403 b->compressedrow.use = PETSC_FALSE; 2404 b->compressedrow.nrows = 0; 2405 b->compressedrow.i = PETSC_NULL; 2406 b->compressedrow.rindex = PETSC_NULL; 2407 b->compressedrow.checked = PETSC_FALSE; 2408 B->same_nonzero = PETSC_FALSE; 2409 2410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2411 "MatInvertBlockDiagonal_SeqBAIJ", 2412 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2414 "MatStoreValues_SeqBAIJ", 2415 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2417 "MatRetrieveValues_SeqBAIJ", 2418 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2420 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2421 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2422 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2423 "MatConvert_SeqBAIJ_SeqAIJ", 2424 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2425 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2426 "MatConvert_SeqBAIJ_SeqSBAIJ", 2427 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2429 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2430 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2431 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 EXTERN_C_END 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2438 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2439 { 2440 Mat C; 2441 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2442 PetscErrorCode ierr; 2443 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2444 2445 PetscFunctionBegin; 2446 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2447 2448 *B = 0; 2449 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2450 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2451 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2452 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2453 c = (Mat_SeqBAIJ*)C->data; 2454 2455 C->rmap.N = A->rmap.N; 2456 C->cmap.N = A->cmap.N; 2457 C->rmap.bs = A->rmap.bs; 2458 c->bs2 = a->bs2; 2459 c->mbs = a->mbs; 2460 c->nbs = a->nbs; 2461 2462 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2463 for (i=0; i<mbs; i++) { 2464 c->imax[i] = a->imax[i]; 2465 c->ilen[i] = a->ilen[i]; 2466 } 2467 2468 /* allocate the matrix space */ 2469 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2470 c->singlemalloc = PETSC_TRUE; 2471 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2472 if (mbs > 0) { 2473 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2474 if (cpvalues == MAT_COPY_VALUES) { 2475 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2476 } else { 2477 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2478 } 2479 } 2480 c->roworiented = a->roworiented; 2481 c->nonew = a->nonew; 2482 2483 if (a->diag) { 2484 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2485 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2486 for (i=0; i<mbs; i++) { 2487 c->diag[i] = a->diag[i]; 2488 } 2489 } else c->diag = 0; 2490 c->nz = a->nz; 2491 c->maxnz = a->maxnz; 2492 c->solve_work = 0; 2493 c->mult_work = 0; 2494 c->free_a = PETSC_TRUE; 2495 c->free_ij = PETSC_TRUE; 2496 C->preallocated = PETSC_TRUE; 2497 C->assembled = PETSC_TRUE; 2498 2499 c->compressedrow.use = a->compressedrow.use; 2500 c->compressedrow.nrows = a->compressedrow.nrows; 2501 c->compressedrow.checked = a->compressedrow.checked; 2502 if ( a->compressedrow.checked && a->compressedrow.use){ 2503 i = a->compressedrow.nrows; 2504 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2505 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2506 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2507 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2508 } else { 2509 c->compressedrow.use = PETSC_FALSE; 2510 c->compressedrow.i = PETSC_NULL; 2511 c->compressedrow.rindex = PETSC_NULL; 2512 } 2513 C->same_nonzero = A->same_nonzero; 2514 *B = C; 2515 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatLoad_SeqBAIJ" 2521 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2522 { 2523 Mat_SeqBAIJ *a; 2524 Mat B; 2525 PetscErrorCode ierr; 2526 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2527 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2528 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2529 PetscInt *masked,nmask,tmp,bs2,ishift; 2530 PetscMPIInt size; 2531 int fd; 2532 PetscScalar *aa; 2533 MPI_Comm comm = ((PetscObject)viewer)->comm; 2534 2535 PetscFunctionBegin; 2536 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2537 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2538 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2539 bs2 = bs*bs; 2540 2541 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2542 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2543 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2544 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2545 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2546 M = header[1]; N = header[2]; nz = header[3]; 2547 2548 if (header[3] < 0) { 2549 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2550 } 2551 2552 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2553 2554 /* 2555 This code adds extra rows to make sure the number of rows is 2556 divisible by the blocksize 2557 */ 2558 mbs = M/bs; 2559 extra_rows = bs - M + bs*(mbs); 2560 if (extra_rows == bs) extra_rows = 0; 2561 else mbs++; 2562 if (extra_rows) { 2563 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2564 } 2565 2566 /* read in row lengths */ 2567 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2568 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2569 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2570 2571 /* read in column indices */ 2572 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2573 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2574 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2575 2576 /* loop over row lengths determining block row lengths */ 2577 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2578 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2579 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2580 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2581 masked = mask + mbs; 2582 rowcount = 0; nzcount = 0; 2583 for (i=0; i<mbs; i++) { 2584 nmask = 0; 2585 for (j=0; j<bs; j++) { 2586 kmax = rowlengths[rowcount]; 2587 for (k=0; k<kmax; k++) { 2588 tmp = jj[nzcount++]/bs; 2589 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2590 } 2591 rowcount++; 2592 } 2593 browlengths[i] += nmask; 2594 /* zero out the mask elements we set */ 2595 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2596 } 2597 2598 /* create our matrix */ 2599 ierr = MatCreate(comm,&B); 2600 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2601 ierr = MatSetType(B,type);CHKERRQ(ierr); 2602 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2603 a = (Mat_SeqBAIJ*)B->data; 2604 2605 /* set matrix "i" values */ 2606 a->i[0] = 0; 2607 for (i=1; i<= mbs; i++) { 2608 a->i[i] = a->i[i-1] + browlengths[i-1]; 2609 a->ilen[i-1] = browlengths[i-1]; 2610 } 2611 a->nz = 0; 2612 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2613 2614 /* read in nonzero values */ 2615 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2616 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2617 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2618 2619 /* set "a" and "j" values into matrix */ 2620 nzcount = 0; jcount = 0; 2621 for (i=0; i<mbs; i++) { 2622 nzcountb = nzcount; 2623 nmask = 0; 2624 for (j=0; j<bs; j++) { 2625 kmax = rowlengths[i*bs+j]; 2626 for (k=0; k<kmax; k++) { 2627 tmp = jj[nzcount++]/bs; 2628 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2629 } 2630 } 2631 /* sort the masked values */ 2632 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2633 2634 /* set "j" values into matrix */ 2635 maskcount = 1; 2636 for (j=0; j<nmask; j++) { 2637 a->j[jcount++] = masked[j]; 2638 mask[masked[j]] = maskcount++; 2639 } 2640 /* set "a" values into matrix */ 2641 ishift = bs2*a->i[i]; 2642 for (j=0; j<bs; j++) { 2643 kmax = rowlengths[i*bs+j]; 2644 for (k=0; k<kmax; k++) { 2645 tmp = jj[nzcountb]/bs ; 2646 block = mask[tmp] - 1; 2647 point = jj[nzcountb] - bs*tmp; 2648 idx = ishift + bs2*block + j + bs*point; 2649 a->a[idx] = (MatScalar)aa[nzcountb++]; 2650 } 2651 } 2652 /* zero out the mask elements we set */ 2653 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2654 } 2655 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2656 2657 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2658 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2659 ierr = PetscFree(aa);CHKERRQ(ierr); 2660 ierr = PetscFree(jj);CHKERRQ(ierr); 2661 ierr = PetscFree(mask);CHKERRQ(ierr); 2662 2663 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2664 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2665 ierr = MatView_Private(B);CHKERRQ(ierr); 2666 2667 *A = B; 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #undef __FUNCT__ 2672 #define __FUNCT__ "MatCreateSeqBAIJ" 2673 /*@C 2674 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2675 compressed row) format. For good matrix assembly performance the 2676 user should preallocate the matrix storage by setting the parameter nz 2677 (or the array nnz). By setting these parameters accurately, performance 2678 during matrix assembly can be increased by more than a factor of 50. 2679 2680 Collective on MPI_Comm 2681 2682 Input Parameters: 2683 + comm - MPI communicator, set to PETSC_COMM_SELF 2684 . bs - size of block 2685 . m - number of rows 2686 . n - number of columns 2687 . nz - number of nonzero blocks per block row (same for all rows) 2688 - nnz - array containing the number of nonzero blocks in the various block rows 2689 (possibly different for each block row) or PETSC_NULL 2690 2691 Output Parameter: 2692 . A - the matrix 2693 2694 Options Database Keys: 2695 . -mat_no_unroll - uses code that does not unroll the loops in the 2696 block calculations (much slower) 2697 . -mat_block_size - size of the blocks to use 2698 2699 Level: intermediate 2700 2701 Notes: 2702 The number of rows and columns must be divisible by blocksize. 2703 2704 If the nnz parameter is given then the nz parameter is ignored 2705 2706 A nonzero block is any block that as 1 or more nonzeros in it 2707 2708 The block AIJ format is fully compatible with standard Fortran 77 2709 storage. That is, the stored row and column indices can begin at 2710 either one (as in Fortran) or zero. See the users' manual for details. 2711 2712 Specify the preallocated storage with either nz or nnz (not both). 2713 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2714 allocation. For additional details, see the users manual chapter on 2715 matrices. 2716 2717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2718 @*/ 2719 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2720 { 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2725 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2726 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2727 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2733 /*@C 2734 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2735 per row in the matrix. For good matrix assembly performance the 2736 user should preallocate the matrix storage by setting the parameter nz 2737 (or the array nnz). By setting these parameters accurately, performance 2738 during matrix assembly can be increased by more than a factor of 50. 2739 2740 Collective on MPI_Comm 2741 2742 Input Parameters: 2743 + A - the matrix 2744 . bs - size of block 2745 . nz - number of block nonzeros per block row (same for all rows) 2746 - nnz - array containing the number of block nonzeros in the various block rows 2747 (possibly different for each block row) or PETSC_NULL 2748 2749 Options Database Keys: 2750 . -mat_no_unroll - uses code that does not unroll the loops in the 2751 block calculations (much slower) 2752 . -mat_block_size - size of the blocks to use 2753 2754 Level: intermediate 2755 2756 Notes: 2757 If the nnz parameter is given then the nz parameter is ignored 2758 2759 The block AIJ format is fully compatible with standard Fortran 77 2760 storage. That is, the stored row and column indices can begin at 2761 either one (as in Fortran) or zero. See the users' manual for details. 2762 2763 Specify the preallocated storage with either nz or nnz (not both). 2764 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2765 allocation. For additional details, see the users manual chapter on 2766 matrices. 2767 2768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2769 @*/ 2770 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2771 { 2772 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2773 2774 PetscFunctionBegin; 2775 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2776 if (f) { 2777 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2778 } 2779 PetscFunctionReturn(0); 2780 } 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2784 /*@ 2785 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2786 (upper triangular entries in CSR format) provided by the user. 2787 2788 Collective on MPI_Comm 2789 2790 Input Parameters: 2791 + comm - must be an MPI communicator of size 1 2792 . bs - size of block 2793 . m - number of rows 2794 . n - number of columns 2795 . i - row indices 2796 . j - column indices 2797 - a - matrix values 2798 2799 Output Parameter: 2800 . mat - the matrix 2801 2802 Level: intermediate 2803 2804 Notes: 2805 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2806 once the matrix is destroyed 2807 2808 You cannot set new nonzero locations into this matrix, that will generate an error. 2809 2810 The i and j indices are 0 based 2811 2812 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2813 2814 @*/ 2815 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2816 { 2817 PetscErrorCode ierr; 2818 PetscInt ii; 2819 Mat_SeqBAIJ *baij; 2820 2821 PetscFunctionBegin; 2822 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2823 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2824 2825 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2826 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2827 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2828 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2829 baij = (Mat_SeqBAIJ*)(*mat)->data; 2830 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2831 2832 baij->i = i; 2833 baij->j = j; 2834 baij->a = a; 2835 baij->singlemalloc = PETSC_FALSE; 2836 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2837 baij->free_a = PETSC_FALSE; 2838 baij->free_ij = PETSC_FALSE; 2839 2840 for (ii=0; ii<m; ii++) { 2841 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2842 #if defined(PETSC_USE_DEBUG) 2843 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2844 #endif 2845 } 2846 #if defined(PETSC_USE_DEBUG) 2847 for (ii=0; ii<baij->i[m]; ii++) { 2848 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2849 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2850 } 2851 #endif 2852 2853 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857