1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for Fun3d sequential benchmark 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 stepval = (n-1)*4; 590 for (k=0; k<m; k++) { /* loop over added rows */ 591 row = im[k]; 592 rp = aj + ai[row]; 593 ap = aa + 16*ai[row]; 594 nrow = ailen[row]; 595 low = 0; 596 for (l=0; l<n; l++) { /* loop over added columns */ 597 col = in[l]; 598 value = v + k*(stepval+4)*4 + l*4; 599 low = 0; high = nrow; 600 while (high-low > 7) { 601 t = (low+high)/2; 602 if (rp[t] > col) high = t; 603 else low = t; 604 } 605 for (i=low; i<high; i++) { 606 if (rp[i] > col) break; 607 if (rp[i] == col) { 608 bap = ap + 16*i; 609 for (ii=0; ii<4; ii++,value+=stepval) { 610 for (jj=ii; jj<16; jj+=4) { 611 bap[jj] += *value++; 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 N = nrow++ - 1; 618 /* shift up all the later entries in this row */ 619 for (ii=N; ii>=i; ii--) { 620 rp[ii+1] = rp[ii]; 621 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 622 } 623 if (N >= i) { 624 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 625 } 626 rp[i] = col; 627 bap = ap + 16*i; 628 for (ii=0; ii<4; ii++,value+=stepval) { 629 for (jj=ii; jj<16; jj+=4) { 630 bap[jj] = *value++; 631 } 632 } 633 noinsert2:; 634 low = i; 635 } 636 ailen[row] = nrow; 637 } 638 PetscFunctionReturnVoid(); 639 } 640 EXTERN_C_END 641 642 #if defined(PETSC_HAVE_FORTRAN_CAPS) 643 #define matsetvalues4_ MATSETVALUES4 644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 645 #define matsetvalues4_ matsetvalues4 646 #endif 647 648 EXTERN_C_BEGIN 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSetValues4_" 651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 652 { 653 Mat A = *AA; 654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 655 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 656 PetscInt *ai=a->i,*ailen=a->ilen; 657 PetscInt *aj=a->j,brow,bcol; 658 PetscInt ridx,cidx; 659 MatScalar *ap,value,*aa=a->a,*bap; 660 661 PetscFunctionBegin; 662 for (k=0; k<m; k++) { /* loop over added rows */ 663 row = im[k]; brow = row/4; 664 rp = aj + ai[brow]; 665 ap = aa + 16*ai[brow]; 666 nrow = ailen[brow]; 667 low = 0; 668 for (l=0; l<n; l++) { /* loop over added columns */ 669 col = in[l]; bcol = col/4; 670 ridx = row % 4; cidx = col % 4; 671 value = v[l + k*n]; 672 low = 0; high = nrow; 673 while (high-low > 7) { 674 t = (low+high)/2; 675 if (rp[t] > bcol) high = t; 676 else low = t; 677 } 678 for (i=low; i<high; i++) { 679 if (rp[i] > bcol) break; 680 if (rp[i] == bcol) { 681 bap = ap + 16*i + 4*cidx + ridx; 682 *bap += value; 683 goto noinsert1; 684 } 685 } 686 N = nrow++ - 1; 687 /* shift up all the later entries in this row */ 688 for (ii=N; ii>=i; ii--) { 689 rp[ii+1] = rp[ii]; 690 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 691 } 692 if (N>=i) { 693 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 694 } 695 rp[i] = bcol; 696 ap[16*i + 4*cidx + ridx] = value; 697 noinsert1:; 698 low = i; 699 } 700 ailen[brow] = nrow; 701 } 702 PetscFunctionReturnVoid(); 703 } 704 EXTERN_C_END 705 706 /* UGLY, ugly, ugly 707 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 708 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 709 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 710 converts the entries into single precision and then calls ..._MatScalar() to put them 711 into the single precision data structures. 712 */ 713 #if defined(PETSC_USE_MAT_SINGLE) 714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 715 #else 716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 717 #endif 718 719 #define CHUNKSIZE 10 720 721 /* 722 Checks for missing diagonals 723 */ 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 727 { 728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 729 PetscErrorCode ierr; 730 PetscInt *diag,*jj = a->j,i; 731 732 PetscFunctionBegin; 733 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 734 diag = a->diag; 735 for (i=0; i<a->mbs; i++) { 736 if (jj[diag[i]] != i) { 737 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 746 { 747 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscInt i,j,*diag,m = a->mbs; 750 751 PetscFunctionBegin; 752 if (a->diag) PetscFunctionReturn(0); 753 754 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 755 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 756 for (i=0; i<m; i++) { 757 diag[i] = a->i[i+1]; 758 for (j=a->i[i]; j<a->i[i+1]; j++) { 759 if (a->j[j] == i) { 760 diag[i] = j; 761 break; 762 } 763 } 764 } 765 a->diag = diag; 766 PetscFunctionReturn(0); 767 } 768 769 770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 775 { 776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 777 PetscErrorCode ierr; 778 PetscInt n = a->mbs,i; 779 780 PetscFunctionBegin; 781 *nn = n; 782 if (!ia) PetscFunctionReturn(0); 783 if (symmetric) { 784 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 785 } else if (oshift == 1) { 786 /* temporarily add 1 to i and j indices */ 787 PetscInt nz = a->i[n]; 788 for (i=0; i<nz; i++) a->j[i]++; 789 for (i=0; i<n+1; i++) a->i[i]++; 790 *ia = a->i; *ja = a->j; 791 } else { 792 *ia = a->i; *ja = a->j; 793 } 794 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 801 { 802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt i,n = a->mbs; 805 806 PetscFunctionBegin; 807 if (!ia) PetscFunctionReturn(0); 808 if (symmetric) { 809 ierr = PetscFree(*ia);CHKERRQ(ierr); 810 ierr = PetscFree(*ja);CHKERRQ(ierr); 811 } else if (oshift == 1) { 812 PetscInt nz = a->i[n]-1; 813 for (i=0; i<nz; i++) a->j[i]--; 814 for (i=0; i<n+1; i++) a->i[i]--; 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatDestroy_SeqBAIJ" 821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 822 { 823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 #if defined(PETSC_USE_LOG) 828 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 829 #endif 830 ierr = PetscFree(a->a);CHKERRQ(ierr); 831 if (!a->singlemalloc) { 832 ierr = PetscFree(a->i);CHKERRQ(ierr); 833 ierr = PetscFree(a->j);CHKERRQ(ierr); 834 } 835 if (a->row) { 836 ierr = ISDestroy(a->row);CHKERRQ(ierr); 837 } 838 if (a->col) { 839 ierr = ISDestroy(a->col);CHKERRQ(ierr); 840 } 841 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 842 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 843 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 844 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 845 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 846 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 847 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 848 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 849 #if defined(PETSC_USE_MAT_SINGLE) 850 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 851 #endif 852 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 853 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 854 855 ierr = PetscFree(a);CHKERRQ(ierr); 856 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatSetOption_SeqBAIJ" 869 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 870 { 871 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 switch (op) { 876 case MAT_ROW_ORIENTED: 877 a->roworiented = PETSC_TRUE; 878 break; 879 case MAT_COLUMN_ORIENTED: 880 a->roworiented = PETSC_FALSE; 881 break; 882 case MAT_COLUMNS_SORTED: 883 a->sorted = PETSC_TRUE; 884 break; 885 case MAT_COLUMNS_UNSORTED: 886 a->sorted = PETSC_FALSE; 887 break; 888 case MAT_KEEP_ZEROED_ROWS: 889 a->keepzeroedrows = PETSC_TRUE; 890 break; 891 case MAT_NO_NEW_NONZERO_LOCATIONS: 892 a->nonew = 1; 893 break; 894 case MAT_NEW_NONZERO_LOCATION_ERR: 895 a->nonew = -1; 896 break; 897 case MAT_NEW_NONZERO_ALLOCATION_ERR: 898 a->nonew = -2; 899 break; 900 case MAT_YES_NEW_NONZERO_LOCATIONS: 901 a->nonew = 0; 902 break; 903 case MAT_ROWS_SORTED: 904 case MAT_ROWS_UNSORTED: 905 case MAT_YES_NEW_DIAGONALS: 906 case MAT_IGNORE_OFF_PROC_ENTRIES: 907 case MAT_USE_HASH_TABLE: 908 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 909 break; 910 case MAT_NO_NEW_DIAGONALS: 911 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 912 case MAT_SYMMETRIC: 913 case MAT_STRUCTURALLY_SYMMETRIC: 914 case MAT_NOT_SYMMETRIC: 915 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 916 case MAT_HERMITIAN: 917 case MAT_NOT_HERMITIAN: 918 case MAT_SYMMETRY_ETERNAL: 919 case MAT_NOT_SYMMETRY_ETERNAL: 920 break; 921 default: 922 SETERRQ(PETSC_ERR_SUP,"unknown option"); 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatGetRow_SeqBAIJ" 929 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 930 { 931 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 932 PetscErrorCode ierr; 933 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 934 MatScalar *aa,*aa_i; 935 PetscScalar *v_i; 936 937 PetscFunctionBegin; 938 bs = A->bs; 939 ai = a->i; 940 aj = a->j; 941 aa = a->a; 942 bs2 = a->bs2; 943 944 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 945 946 bn = row/bs; /* Block number */ 947 bp = row % bs; /* Block Position */ 948 M = ai[bn+1] - ai[bn]; 949 *nz = bs*M; 950 951 if (v) { 952 *v = 0; 953 if (*nz) { 954 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 955 for (i=0; i<M; i++) { /* for each block in the block row */ 956 v_i = *v + i*bs; 957 aa_i = aa + bs2*(ai[bn] + i); 958 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 959 } 960 } 961 } 962 963 if (idx) { 964 *idx = 0; 965 if (*nz) { 966 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 967 for (i=0; i<M; i++) { /* for each block in the block row */ 968 idx_i = *idx + i*bs; 969 itmp = bs*aj[ai[bn] + i]; 970 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 971 } 972 } 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 979 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 980 { 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 985 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatTranspose_SeqBAIJ" 991 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 992 { 993 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 994 Mat C; 995 PetscErrorCode ierr; 996 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 997 PetscInt *rows,*cols,bs2=a->bs2; 998 PetscScalar *array; 999 1000 PetscFunctionBegin; 1001 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1002 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1003 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1004 1005 #if defined(PETSC_USE_MAT_SINGLE) 1006 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1007 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1008 #else 1009 array = a->a; 1010 #endif 1011 1012 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1013 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 1014 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1015 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1016 ierr = PetscFree(col);CHKERRQ(ierr); 1017 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1018 cols = rows + bs; 1019 for (i=0; i<mbs; i++) { 1020 cols[0] = i*bs; 1021 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1022 len = ai[i+1] - ai[i]; 1023 for (j=0; j<len; j++) { 1024 rows[0] = (*aj++)*bs; 1025 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1026 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1027 array += bs2; 1028 } 1029 } 1030 ierr = PetscFree(rows);CHKERRQ(ierr); 1031 #if defined(PETSC_USE_MAT_SINGLE) 1032 ierr = PetscFree(array); 1033 #endif 1034 1035 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 1038 if (B) { 1039 *B = C; 1040 } else { 1041 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1042 } 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1048 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1049 { 1050 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1051 PetscErrorCode ierr; 1052 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1053 int fd; 1054 PetscScalar *aa; 1055 FILE *file; 1056 1057 PetscFunctionBegin; 1058 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1059 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1060 col_lens[0] = MAT_FILE_COOKIE; 1061 1062 col_lens[1] = A->m; 1063 col_lens[2] = A->n; 1064 col_lens[3] = a->nz*bs2; 1065 1066 /* store lengths of each row and write (including header) to file */ 1067 count = 0; 1068 for (i=0; i<a->mbs; i++) { 1069 for (j=0; j<bs; j++) { 1070 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1071 } 1072 } 1073 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1074 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1075 1076 /* store column indices (zero start index) */ 1077 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1078 count = 0; 1079 for (i=0; i<a->mbs; i++) { 1080 for (j=0; j<bs; j++) { 1081 for (k=a->i[i]; k<a->i[i+1]; k++) { 1082 for (l=0; l<bs; l++) { 1083 jj[count++] = bs*a->j[k] + l; 1084 } 1085 } 1086 } 1087 } 1088 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1089 ierr = PetscFree(jj);CHKERRQ(ierr); 1090 1091 /* store nonzero values */ 1092 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1093 count = 0; 1094 for (i=0; i<a->mbs; i++) { 1095 for (j=0; j<bs; j++) { 1096 for (k=a->i[i]; k<a->i[i+1]; k++) { 1097 for (l=0; l<bs; l++) { 1098 aa[count++] = a->a[bs2*k + l*bs + j]; 1099 } 1100 } 1101 } 1102 } 1103 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1104 ierr = PetscFree(aa);CHKERRQ(ierr); 1105 1106 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1107 if (file) { 1108 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1109 } 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1115 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1116 { 1117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1120 PetscViewerFormat format; 1121 1122 PetscFunctionBegin; 1123 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1124 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1125 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1126 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1127 Mat aij; 1128 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1129 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1130 ierr = MatDestroy(aij);CHKERRQ(ierr); 1131 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1132 PetscFunctionReturn(0); 1133 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1134 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1135 for (i=0; i<a->mbs; i++) { 1136 for (j=0; j<bs; j++) { 1137 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1138 for (k=a->i[i]; k<a->i[i+1]; k++) { 1139 for (l=0; l<bs; l++) { 1140 #if defined(PETSC_USE_COMPLEX) 1141 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1142 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1143 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1144 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1145 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1146 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1147 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } 1150 #else 1151 if (a->a[bs2*k + l*bs + j] != 0.0) { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1153 } 1154 #endif 1155 } 1156 } 1157 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1158 } 1159 } 1160 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1161 } else { 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1163 for (i=0; i<a->mbs; i++) { 1164 for (j=0; j<bs; j++) { 1165 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1166 for (k=a->i[i]; k<a->i[i+1]; k++) { 1167 for (l=0; l<bs; l++) { 1168 #if defined(PETSC_USE_COMPLEX) 1169 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1170 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1171 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1172 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1174 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1175 } else { 1176 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } 1178 #else 1179 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1180 #endif 1181 } 1182 } 1183 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1184 } 1185 } 1186 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1187 } 1188 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1194 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1195 { 1196 Mat A = (Mat) Aa; 1197 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1198 PetscErrorCode ierr; 1199 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1200 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1201 MatScalar *aa; 1202 PetscViewer viewer; 1203 1204 PetscFunctionBegin; 1205 1206 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1207 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1208 1209 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1210 1211 /* loop over matrix elements drawing boxes */ 1212 color = PETSC_DRAW_BLUE; 1213 for (i=0,row=0; i<mbs; i++,row+=bs) { 1214 for (j=a->i[i]; j<a->i[i+1]; j++) { 1215 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1216 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1217 aa = a->a + j*bs2; 1218 for (k=0; k<bs; k++) { 1219 for (l=0; l<bs; l++) { 1220 if (PetscRealPart(*aa++) >= 0.) continue; 1221 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1222 } 1223 } 1224 } 1225 } 1226 color = PETSC_DRAW_CYAN; 1227 for (i=0,row=0; i<mbs; i++,row+=bs) { 1228 for (j=a->i[i]; j<a->i[i+1]; j++) { 1229 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1230 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1231 aa = a->a + j*bs2; 1232 for (k=0; k<bs; k++) { 1233 for (l=0; l<bs; l++) { 1234 if (PetscRealPart(*aa++) != 0.) continue; 1235 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1236 } 1237 } 1238 } 1239 } 1240 1241 color = PETSC_DRAW_RED; 1242 for (i=0,row=0; i<mbs; i++,row+=bs) { 1243 for (j=a->i[i]; j<a->i[i+1]; j++) { 1244 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1245 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1246 aa = a->a + j*bs2; 1247 for (k=0; k<bs; k++) { 1248 for (l=0; l<bs; l++) { 1249 if (PetscRealPart(*aa++) <= 0.) continue; 1250 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1251 } 1252 } 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1260 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1261 { 1262 PetscErrorCode ierr; 1263 PetscReal xl,yl,xr,yr,w,h; 1264 PetscDraw draw; 1265 PetscTruth isnull; 1266 1267 PetscFunctionBegin; 1268 1269 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1270 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1271 1272 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1273 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1274 xr += w; yr += h; xl = -w; yl = -h; 1275 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1276 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1277 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatView_SeqBAIJ" 1283 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1284 { 1285 PetscErrorCode ierr; 1286 PetscTruth iascii,isbinary,isdraw; 1287 1288 PetscFunctionBegin; 1289 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1290 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1292 if (iascii){ 1293 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1294 } else if (isbinary) { 1295 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1296 } else if (isdraw) { 1297 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1298 } else { 1299 Mat B; 1300 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1301 ierr = MatView(B,viewer);CHKERRQ(ierr); 1302 ierr = MatDestroy(B);CHKERRQ(ierr); 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1310 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1311 { 1312 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1313 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1314 PetscInt *ai = a->i,*ailen = a->ilen; 1315 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1316 MatScalar *ap,*aa = a->a,zero = 0.0; 1317 1318 PetscFunctionBegin; 1319 for (k=0; k<m; k++) { /* loop over rows */ 1320 row = im[k]; brow = row/bs; 1321 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1322 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1323 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1324 nrow = ailen[brow]; 1325 for (l=0; l<n; l++) { /* loop over columns */ 1326 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1327 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1328 col = in[l] ; 1329 bcol = col/bs; 1330 cidx = col%bs; 1331 ridx = row%bs; 1332 high = nrow; 1333 low = 0; /* assume unsorted */ 1334 while (high-low > 5) { 1335 t = (low+high)/2; 1336 if (rp[t] > bcol) high = t; 1337 else low = t; 1338 } 1339 for (i=low; i<high; i++) { 1340 if (rp[i] > bcol) break; 1341 if (rp[i] == bcol) { 1342 *v++ = ap[bs2*i+bs*cidx+ridx]; 1343 goto finished; 1344 } 1345 } 1346 *v++ = zero; 1347 finished:; 1348 } 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #if defined(PETSC_USE_MAT_SINGLE) 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1356 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1357 { 1358 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,N = m*n*b->bs2; 1361 MatScalar *vsingle; 1362 1363 PetscFunctionBegin; 1364 if (N > b->setvalueslen) { 1365 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1366 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1367 b->setvalueslen = N; 1368 } 1369 vsingle = b->setvaluescopy; 1370 for (i=0; i<N; i++) { 1371 vsingle[i] = v[i]; 1372 } 1373 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 #endif 1377 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1381 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1382 { 1383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1384 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1385 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1386 PetscErrorCode ierr; 1387 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1388 PetscTruth roworiented=a->roworiented; 1389 const MatScalar *value = v; 1390 MatScalar *ap,*aa = a->a,*bap; 1391 1392 PetscFunctionBegin; 1393 if (roworiented) { 1394 stepval = (n-1)*bs; 1395 } else { 1396 stepval = (m-1)*bs; 1397 } 1398 for (k=0; k<m; k++) { /* loop over added rows */ 1399 row = im[k]; 1400 if (row < 0) continue; 1401 #if defined(PETSC_USE_DEBUG) 1402 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1403 #endif 1404 rp = aj + ai[row]; 1405 ap = aa + bs2*ai[row]; 1406 rmax = imax[row]; 1407 nrow = ailen[row]; 1408 low = 0; 1409 high = nrow; 1410 for (l=0; l<n; l++) { /* loop over added columns */ 1411 if (in[l] < 0) continue; 1412 #if defined(PETSC_USE_DEBUG) 1413 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1414 #endif 1415 col = in[l]; 1416 if (roworiented) { 1417 value = v + k*(stepval+bs)*bs + l*bs; 1418 } else { 1419 value = v + l*(stepval+bs)*bs + k*bs; 1420 } 1421 if (col < lastcol) low = 0; else high = nrow; 1422 lastcol = col; 1423 while (high-low > 7) { 1424 t = (low+high)/2; 1425 if (rp[t] > col) high = t; 1426 else low = t; 1427 } 1428 for (i=low; i<high; i++) { 1429 if (rp[i] > col) break; 1430 if (rp[i] == col) { 1431 bap = ap + bs2*i; 1432 if (roworiented) { 1433 if (is == ADD_VALUES) { 1434 for (ii=0; ii<bs; ii++,value+=stepval) { 1435 for (jj=ii; jj<bs2; jj+=bs) { 1436 bap[jj] += *value++; 1437 } 1438 } 1439 } else { 1440 for (ii=0; ii<bs; ii++,value+=stepval) { 1441 for (jj=ii; jj<bs2; jj+=bs) { 1442 bap[jj] = *value++; 1443 } 1444 } 1445 } 1446 } else { 1447 if (is == ADD_VALUES) { 1448 for (ii=0; ii<bs; ii++,value+=stepval) { 1449 for (jj=0; jj<bs; jj++) { 1450 *bap++ += *value++; 1451 } 1452 } 1453 } else { 1454 for (ii=0; ii<bs; ii++,value+=stepval) { 1455 for (jj=0; jj<bs; jj++) { 1456 *bap++ = *value++; 1457 } 1458 } 1459 } 1460 } 1461 goto noinsert2; 1462 } 1463 } 1464 if (nonew == 1) goto noinsert2; 1465 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1466 if (nrow >= rmax) { 1467 /* there is no extra room in row, therefore enlarge */ 1468 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1469 MatScalar *new_a; 1470 1471 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1472 1473 /* malloc new storage space */ 1474 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1475 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1476 new_j = (PetscInt*)(new_a + bs2*new_nz); 1477 new_i = new_j + new_nz; 1478 1479 /* copy over old data into new slots */ 1480 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1481 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1482 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1483 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1484 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1485 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1486 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1487 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1488 /* free up old matrix storage */ 1489 ierr = PetscFree(a->a);CHKERRQ(ierr); 1490 if (!a->singlemalloc) { 1491 ierr = PetscFree(a->i);CHKERRQ(ierr); 1492 ierr = PetscFree(a->j);CHKERRQ(ierr); 1493 } 1494 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1495 a->singlemalloc = PETSC_TRUE; 1496 1497 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1498 rmax = imax[row] = imax[row] + CHUNKSIZE; 1499 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1500 a->maxnz += bs2*CHUNKSIZE; 1501 a->reallocs++; 1502 a->nz++; 1503 } 1504 N = nrow++ - 1; 1505 /* shift up all the later entries in this row */ 1506 for (ii=N; ii>=i; ii--) { 1507 rp[ii+1] = rp[ii]; 1508 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1509 } 1510 if (N >= i) { 1511 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1512 } 1513 rp[i] = col; 1514 bap = ap + bs2*i; 1515 if (roworiented) { 1516 for (ii=0; ii<bs; ii++,value+=stepval) { 1517 for (jj=ii; jj<bs2; jj+=bs) { 1518 bap[jj] = *value++; 1519 } 1520 } 1521 } else { 1522 for (ii=0; ii<bs; ii++,value+=stepval) { 1523 for (jj=0; jj<bs; jj++) { 1524 *bap++ = *value++; 1525 } 1526 } 1527 } 1528 noinsert2:; 1529 low = i; 1530 } 1531 ailen[row] = nrow; 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1538 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1539 { 1540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1541 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1542 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1543 PetscErrorCode ierr; 1544 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1545 MatScalar *aa = a->a,*ap; 1546 PetscReal ratio=0.6; 1547 1548 PetscFunctionBegin; 1549 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1550 1551 if (m) rmax = ailen[0]; 1552 for (i=1; i<mbs; i++) { 1553 /* move each row back by the amount of empty slots (fshift) before it*/ 1554 fshift += imax[i-1] - ailen[i-1]; 1555 rmax = PetscMax(rmax,ailen[i]); 1556 if (fshift) { 1557 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1558 N = ailen[i]; 1559 for (j=0; j<N; j++) { 1560 ip[j-fshift] = ip[j]; 1561 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1562 } 1563 } 1564 ai[i] = ai[i-1] + ailen[i-1]; 1565 } 1566 if (mbs) { 1567 fshift += imax[mbs-1] - ailen[mbs-1]; 1568 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1569 } 1570 /* reset ilen and imax for each row */ 1571 for (i=0; i<mbs; i++) { 1572 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1573 } 1574 a->nz = ai[mbs]; 1575 1576 /* diagonals may have moved, so kill the diagonal pointers */ 1577 a->idiagvalid = PETSC_FALSE; 1578 if (fshift && a->diag) { 1579 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1580 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1581 a->diag = 0; 1582 } 1583 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1584 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1585 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1586 a->reallocs = 0; 1587 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1588 1589 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1590 if (a->compressedrow.use){ 1591 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1592 } 1593 1594 A->same_nonzero = PETSC_TRUE; 1595 PetscFunctionReturn(0); 1596 } 1597 1598 /* 1599 This function returns an array of flags which indicate the locations of contiguous 1600 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1601 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1602 Assume: sizes should be long enough to hold all the values. 1603 */ 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1606 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1607 { 1608 PetscInt i,j,k,row; 1609 PetscTruth flg; 1610 1611 PetscFunctionBegin; 1612 for (i=0,j=0; i<n; j++) { 1613 row = idx[i]; 1614 if (row%bs!=0) { /* Not the begining of a block */ 1615 sizes[j] = 1; 1616 i++; 1617 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1618 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1619 i++; 1620 } else { /* Begining of the block, so check if the complete block exists */ 1621 flg = PETSC_TRUE; 1622 for (k=1; k<bs; k++) { 1623 if (row+k != idx[i+k]) { /* break in the block */ 1624 flg = PETSC_FALSE; 1625 break; 1626 } 1627 } 1628 if (flg) { /* No break in the bs */ 1629 sizes[j] = bs; 1630 i+= bs; 1631 } else { 1632 sizes[j] = 1; 1633 i++; 1634 } 1635 } 1636 } 1637 *bs_max = j; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1643 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1644 { 1645 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1646 PetscErrorCode ierr; 1647 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1648 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1649 PetscScalar zero = 0.0; 1650 MatScalar *aa; 1651 1652 PetscFunctionBegin; 1653 /* Make a copy of the IS and sort it */ 1654 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1655 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1656 1657 /* allocate memory for rows,sizes */ 1658 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1659 sizes = rows + is_n; 1660 1661 /* copy IS values to rows, and sort them */ 1662 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1663 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1664 if (baij->keepzeroedrows) { 1665 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1666 bs_max = is_n; 1667 A->same_nonzero = PETSC_TRUE; 1668 } else { 1669 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1670 A->same_nonzero = PETSC_FALSE; 1671 } 1672 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1673 1674 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1675 row = rows[j]; 1676 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1677 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1678 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1679 if (sizes[i] == bs && !baij->keepzeroedrows) { 1680 if (diag) { 1681 if (baij->ilen[row/bs] > 0) { 1682 baij->ilen[row/bs] = 1; 1683 baij->j[baij->i[row/bs]] = row/bs; 1684 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1685 } 1686 /* Now insert all the diagonal values for this bs */ 1687 for (k=0; k<bs; k++) { 1688 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1689 } 1690 } else { /* (!diag) */ 1691 baij->ilen[row/bs] = 0; 1692 } /* end (!diag) */ 1693 } else { /* (sizes[i] != bs) */ 1694 #if defined (PETSC_USE_DEBUG) 1695 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1696 #endif 1697 for (k=0; k<count; k++) { 1698 aa[0] = zero; 1699 aa += bs; 1700 } 1701 if (diag) { 1702 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1703 } 1704 } 1705 } 1706 1707 ierr = PetscFree(rows);CHKERRQ(ierr); 1708 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1714 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1715 { 1716 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1717 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1718 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1719 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1720 PetscErrorCode ierr; 1721 PetscInt ridx,cidx,bs2=a->bs2; 1722 PetscTruth roworiented=a->roworiented; 1723 MatScalar *ap,value,*aa=a->a,*bap; 1724 1725 PetscFunctionBegin; 1726 for (k=0; k<m; k++) { /* loop over added rows */ 1727 row = im[k]; brow = row/bs; 1728 if (row < 0) continue; 1729 #if defined(PETSC_USE_DEBUG) 1730 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1731 #endif 1732 rp = aj + ai[brow]; 1733 ap = aa + bs2*ai[brow]; 1734 rmax = imax[brow]; 1735 nrow = ailen[brow]; 1736 low = 0; 1737 high = nrow; 1738 for (l=0; l<n; l++) { /* loop over added columns */ 1739 if (in[l] < 0) continue; 1740 #if defined(PETSC_USE_DEBUG) 1741 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1742 #endif 1743 col = in[l]; bcol = col/bs; 1744 ridx = row % bs; cidx = col % bs; 1745 if (roworiented) { 1746 value = v[l + k*n]; 1747 } else { 1748 value = v[k + l*m]; 1749 } 1750 if (col < lastcol) low = 0; else high = nrow; 1751 lastcol = col; 1752 while (high-low > 7) { 1753 t = (low+high)/2; 1754 if (rp[t] > bcol) high = t; 1755 else low = t; 1756 } 1757 for (i=low; i<high; i++) { 1758 if (rp[i] > bcol) break; 1759 if (rp[i] == bcol) { 1760 bap = ap + bs2*i + bs*cidx + ridx; 1761 if (is == ADD_VALUES) *bap += value; 1762 else *bap = value; 1763 goto noinsert1; 1764 } 1765 } 1766 if (nonew == 1) goto noinsert1; 1767 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1768 if (nrow >= rmax) { 1769 /* there is no extra room in row, therefore enlarge */ 1770 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1771 MatScalar *new_a; 1772 1773 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1774 1775 /* Malloc new storage space */ 1776 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1777 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1778 new_j = (PetscInt*)(new_a + bs2*new_nz); 1779 new_i = new_j + new_nz; 1780 1781 /* copy over old data into new slots */ 1782 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1783 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1784 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1785 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1786 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1787 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1788 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1789 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1790 /* free up old matrix storage */ 1791 ierr = PetscFree(a->a);CHKERRQ(ierr); 1792 if (!a->singlemalloc) { 1793 ierr = PetscFree(a->i);CHKERRQ(ierr); 1794 ierr = PetscFree(a->j);CHKERRQ(ierr); 1795 } 1796 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1797 a->singlemalloc = PETSC_TRUE; 1798 1799 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1800 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1801 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1802 a->maxnz += bs2*CHUNKSIZE; 1803 a->reallocs++; 1804 a->nz++; 1805 } 1806 N = nrow++ - 1; 1807 /* shift up all the later entries in this row */ 1808 for (ii=N; ii>=i; ii--) { 1809 rp[ii+1] = rp[ii]; 1810 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1811 } 1812 if (N>=i) { 1813 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1814 } 1815 rp[i] = bcol; 1816 ap[bs2*i + bs*cidx + ridx] = value; 1817 noinsert1:; 1818 low = i; 1819 } 1820 ailen[brow] = nrow; 1821 } 1822 A->same_nonzero = PETSC_FALSE; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1829 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1830 { 1831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1832 Mat outA; 1833 PetscErrorCode ierr; 1834 PetscTruth row_identity,col_identity; 1835 1836 PetscFunctionBegin; 1837 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1838 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1839 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1840 if (!row_identity || !col_identity) { 1841 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1842 } 1843 1844 outA = inA; 1845 inA->factor = FACTOR_LU; 1846 1847 if (!a->diag) { 1848 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1849 } 1850 1851 a->row = row; 1852 a->col = col; 1853 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1854 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1855 1856 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1857 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1858 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1859 1860 /* 1861 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1862 for ILU(0) factorization with natural ordering 1863 */ 1864 if (inA->bs < 8) { 1865 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1866 } else { 1867 if (!a->solve_work) { 1868 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1869 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1870 } 1871 } 1872 1873 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1874 1875 PetscFunctionReturn(0); 1876 } 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1879 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1880 { 1881 static PetscTruth called = PETSC_FALSE; 1882 MPI_Comm comm = A->comm; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1887 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1888 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 EXTERN_C_BEGIN 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1895 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1896 { 1897 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1898 PetscInt i,nz,nbs; 1899 1900 PetscFunctionBegin; 1901 nz = baij->maxnz/baij->bs2; 1902 nbs = baij->nbs; 1903 for (i=0; i<nz; i++) { 1904 baij->j[i] = indices[i]; 1905 } 1906 baij->nz = nz; 1907 for (i=0; i<nbs; i++) { 1908 baij->ilen[i] = baij->imax[i]; 1909 } 1910 1911 PetscFunctionReturn(0); 1912 } 1913 EXTERN_C_END 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1917 /*@ 1918 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1919 in the matrix. 1920 1921 Input Parameters: 1922 + mat - the SeqBAIJ matrix 1923 - indices - the column indices 1924 1925 Level: advanced 1926 1927 Notes: 1928 This can be called if you have precomputed the nonzero structure of the 1929 matrix and want to provide it to the matrix object to improve the performance 1930 of the MatSetValues() operation. 1931 1932 You MUST have set the correct numbers of nonzeros per row in the call to 1933 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1934 1935 MUST be called before any calls to MatSetValues(); 1936 1937 @*/ 1938 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1939 { 1940 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1941 1942 PetscFunctionBegin; 1943 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1944 PetscValidPointer(indices,2); 1945 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1946 if (f) { 1947 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1948 } else { 1949 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1956 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1957 { 1958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1959 PetscErrorCode ierr; 1960 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1961 PetscReal atmp; 1962 PetscScalar *x,zero = 0.0; 1963 MatScalar *aa; 1964 PetscInt ncols,brow,krow,kcol; 1965 1966 PetscFunctionBegin; 1967 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1968 bs = A->bs; 1969 aa = a->a; 1970 ai = a->i; 1971 aj = a->j; 1972 mbs = a->mbs; 1973 1974 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1975 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1976 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1977 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1978 for (i=0; i<mbs; i++) { 1979 ncols = ai[1] - ai[0]; ai++; 1980 brow = bs*i; 1981 for (j=0; j<ncols; j++){ 1982 /* bcol = bs*(*aj); */ 1983 for (kcol=0; kcol<bs; kcol++){ 1984 for (krow=0; krow<bs; krow++){ 1985 atmp = PetscAbsScalar(*aa); aa++; 1986 row = brow + krow; /* row index */ 1987 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1988 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1989 } 1990 } 1991 aj++; 1992 } 1993 } 1994 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2000 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2001 { 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2011 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2012 { 2013 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2014 PetscFunctionBegin; 2015 *array = a->a; 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2021 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2022 { 2023 PetscFunctionBegin; 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #include "petscblaslapack.h" 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2030 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 2031 { 2032 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2033 PetscErrorCode ierr; 2034 PetscInt i,bs=Y->bs,j,bs2; 2035 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 2036 2037 PetscFunctionBegin; 2038 if (str == SAME_NONZERO_PATTERN) { 2039 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2040 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2041 if (y->xtoy && y->XtoY != X) { 2042 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2043 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2044 } 2045 if (!y->xtoy) { /* get xtoy */ 2046 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2047 y->XtoY = X; 2048 } 2049 bs2 = bs*bs; 2050 for (i=0; i<x->nz; i++) { 2051 j = 0; 2052 while (j < bs2){ 2053 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2054 j++; 2055 } 2056 } 2057 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2058 } else { 2059 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /* -------------------------------------------------------------------*/ 2065 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2066 MatGetRow_SeqBAIJ, 2067 MatRestoreRow_SeqBAIJ, 2068 MatMult_SeqBAIJ_N, 2069 /* 4*/ MatMultAdd_SeqBAIJ_N, 2070 MatMultTranspose_SeqBAIJ, 2071 MatMultTransposeAdd_SeqBAIJ, 2072 MatSolve_SeqBAIJ_N, 2073 0, 2074 0, 2075 /*10*/ 0, 2076 MatLUFactor_SeqBAIJ, 2077 0, 2078 0, 2079 MatTranspose_SeqBAIJ, 2080 /*15*/ MatGetInfo_SeqBAIJ, 2081 MatEqual_SeqBAIJ, 2082 MatGetDiagonal_SeqBAIJ, 2083 MatDiagonalScale_SeqBAIJ, 2084 MatNorm_SeqBAIJ, 2085 /*20*/ 0, 2086 MatAssemblyEnd_SeqBAIJ, 2087 0, 2088 MatSetOption_SeqBAIJ, 2089 MatZeroEntries_SeqBAIJ, 2090 /*25*/ MatZeroRows_SeqBAIJ, 2091 MatLUFactorSymbolic_SeqBAIJ, 2092 MatLUFactorNumeric_SeqBAIJ_N, 2093 MatCholeskyFactorSymbolic_SeqBAIJ, 2094 MatCholeskyFactorNumeric_SeqBAIJ_N, 2095 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2096 MatILUFactorSymbolic_SeqBAIJ, 2097 MatICCFactorSymbolic_SeqBAIJ, 2098 MatGetArray_SeqBAIJ, 2099 MatRestoreArray_SeqBAIJ, 2100 /*35*/ MatDuplicate_SeqBAIJ, 2101 0, 2102 0, 2103 MatILUFactor_SeqBAIJ, 2104 0, 2105 /*40*/ MatAXPY_SeqBAIJ, 2106 MatGetSubMatrices_SeqBAIJ, 2107 MatIncreaseOverlap_SeqBAIJ, 2108 MatGetValues_SeqBAIJ, 2109 0, 2110 /*45*/ MatPrintHelp_SeqBAIJ, 2111 MatScale_SeqBAIJ, 2112 0, 2113 0, 2114 0, 2115 /*50*/ 0, 2116 MatGetRowIJ_SeqBAIJ, 2117 MatRestoreRowIJ_SeqBAIJ, 2118 0, 2119 0, 2120 /*55*/ 0, 2121 0, 2122 0, 2123 0, 2124 MatSetValuesBlocked_SeqBAIJ, 2125 /*60*/ MatGetSubMatrix_SeqBAIJ, 2126 MatDestroy_SeqBAIJ, 2127 MatView_SeqBAIJ, 2128 MatGetPetscMaps_Petsc, 2129 0, 2130 /*65*/ 0, 2131 0, 2132 0, 2133 0, 2134 0, 2135 /*70*/ MatGetRowMax_SeqBAIJ, 2136 MatConvert_Basic, 2137 0, 2138 0, 2139 0, 2140 /*75*/ 0, 2141 0, 2142 0, 2143 0, 2144 0, 2145 /*80*/ 0, 2146 0, 2147 0, 2148 0, 2149 MatLoad_SeqBAIJ, 2150 /*85*/ 0, 2151 0, 2152 0, 2153 0, 2154 0, 2155 /*90*/ 0, 2156 0, 2157 0, 2158 0, 2159 0, 2160 /*95*/ 0, 2161 0, 2162 0, 2163 0}; 2164 2165 EXTERN_C_BEGIN 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2168 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2169 { 2170 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2171 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 if (aij->nonew != 1) { 2176 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2177 } 2178 2179 /* allocate space for values if not already there */ 2180 if (!aij->saved_values) { 2181 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2182 } 2183 2184 /* copy values over */ 2185 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 EXTERN_C_END 2189 2190 EXTERN_C_BEGIN 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2193 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2194 { 2195 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2196 PetscErrorCode ierr; 2197 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2198 2199 PetscFunctionBegin; 2200 if (aij->nonew != 1) { 2201 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2202 } 2203 if (!aij->saved_values) { 2204 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2205 } 2206 2207 /* copy values over */ 2208 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 EXTERN_C_END 2212 2213 EXTERN_C_BEGIN 2214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2216 EXTERN_C_END 2217 2218 EXTERN_C_BEGIN 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2221 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2222 { 2223 Mat_SeqBAIJ *b; 2224 PetscErrorCode ierr; 2225 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2226 PetscTruth flg; 2227 2228 PetscFunctionBegin; 2229 2230 B->preallocated = PETSC_TRUE; 2231 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2232 if (nnz && newbs != bs) { 2233 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2234 } 2235 bs = newbs; 2236 2237 mbs = B->m/bs; 2238 nbs = B->n/bs; 2239 bs2 = bs*bs; 2240 2241 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2242 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2243 } 2244 2245 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2246 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2247 if (nnz) { 2248 for (i=0; i<mbs; i++) { 2249 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2250 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2251 } 2252 } 2253 2254 b = (Mat_SeqBAIJ*)B->data; 2255 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2256 B->ops->solve = MatSolve_SeqBAIJ_Update; 2257 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2258 if (!flg) { 2259 switch (bs) { 2260 case 1: 2261 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2262 B->ops->mult = MatMult_SeqBAIJ_1; 2263 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2264 break; 2265 case 2: 2266 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2267 B->ops->mult = MatMult_SeqBAIJ_2; 2268 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2269 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2270 break; 2271 case 3: 2272 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2273 B->ops->mult = MatMult_SeqBAIJ_3; 2274 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2275 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2276 break; 2277 case 4: 2278 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2279 B->ops->mult = MatMult_SeqBAIJ_4; 2280 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2281 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2282 break; 2283 case 5: 2284 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2285 B->ops->mult = MatMult_SeqBAIJ_5; 2286 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2287 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2288 break; 2289 case 6: 2290 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2291 B->ops->mult = MatMult_SeqBAIJ_6; 2292 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2293 break; 2294 case 7: 2295 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2296 B->ops->mult = MatMult_SeqBAIJ_7; 2297 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2298 break; 2299 default: 2300 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2301 B->ops->mult = MatMult_SeqBAIJ_N; 2302 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2303 break; 2304 } 2305 } 2306 B->bs = bs; 2307 b->mbs = mbs; 2308 b->nbs = nbs; 2309 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2310 if (!nnz) { 2311 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2312 else if (nz <= 0) nz = 1; 2313 for (i=0; i<mbs; i++) b->imax[i] = nz; 2314 nz = nz*mbs; 2315 } else { 2316 nz = 0; 2317 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2318 } 2319 2320 /* allocate the matrix space */ 2321 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2322 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2323 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2324 b->j = (PetscInt*)(b->a + nz*bs2); 2325 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2326 b->i = b->j + nz; 2327 b->singlemalloc = PETSC_TRUE; 2328 2329 b->i[0] = 0; 2330 for (i=1; i<mbs+1; i++) { 2331 b->i[i] = b->i[i-1] + b->imax[i-1]; 2332 } 2333 2334 /* b->ilen will count nonzeros in each block row so far. */ 2335 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2336 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2337 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2338 2339 B->bs = bs; 2340 b->bs2 = bs2; 2341 b->mbs = mbs; 2342 b->nz = 0; 2343 b->maxnz = nz*bs2; 2344 B->info.nz_unneeded = (PetscReal)b->maxnz; 2345 PetscFunctionReturn(0); 2346 } 2347 EXTERN_C_END 2348 2349 /*MC 2350 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2351 block sparse compressed row format. 2352 2353 Options Database Keys: 2354 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2355 2356 Level: beginner 2357 2358 .seealso: MatCreateSeqBAIJ 2359 M*/ 2360 2361 EXTERN_C_BEGIN 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatCreate_SeqBAIJ" 2364 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2365 { 2366 PetscErrorCode ierr; 2367 PetscMPIInt size; 2368 Mat_SeqBAIJ *b; 2369 2370 PetscFunctionBegin; 2371 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2372 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2373 2374 B->m = B->M = PetscMax(B->m,B->M); 2375 B->n = B->N = PetscMax(B->n,B->N); 2376 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2377 B->data = (void*)b; 2378 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2379 B->factor = 0; 2380 B->mapping = 0; 2381 b->row = 0; 2382 b->col = 0; 2383 b->icol = 0; 2384 b->reallocs = 0; 2385 b->saved_values = 0; 2386 #if defined(PETSC_USE_MAT_SINGLE) 2387 b->setvalueslen = 0; 2388 b->setvaluescopy = PETSC_NULL; 2389 #endif 2390 2391 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2392 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2393 2394 b->sorted = PETSC_FALSE; 2395 b->roworiented = PETSC_TRUE; 2396 b->nonew = 0; 2397 b->diag = 0; 2398 b->solve_work = 0; 2399 b->mult_work = 0; 2400 B->spptr = 0; 2401 B->info.nz_unneeded = (PetscReal)b->maxnz; 2402 b->keepzeroedrows = PETSC_FALSE; 2403 b->xtoy = 0; 2404 b->XtoY = 0; 2405 b->compressedrow.use = PETSC_FALSE; 2406 b->compressedrow.nrows = 0; 2407 b->compressedrow.i = PETSC_NULL; 2408 b->compressedrow.rindex = PETSC_NULL; 2409 b->compressedrow.checked = PETSC_FALSE; 2410 B->same_nonzero = PETSC_FALSE; 2411 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2413 "MatInvertBlockDiagonal_SeqBAIJ", 2414 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2416 "MatStoreValues_SeqBAIJ", 2417 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2419 "MatRetrieveValues_SeqBAIJ", 2420 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2421 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2422 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2423 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2424 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2425 "MatConvert_SeqBAIJ_SeqAIJ", 2426 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2427 #if !defined(PETSC_USE_64BIT_INT) 2428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2429 "MatConvert_SeqBAIJ_SeqSBAIJ", 2430 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2431 #endif 2432 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2433 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2434 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 EXTERN_C_END 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2441 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2442 { 2443 Mat C; 2444 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2445 PetscErrorCode ierr; 2446 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2447 2448 PetscFunctionBegin; 2449 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2450 2451 *B = 0; 2452 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2453 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2454 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2455 c = (Mat_SeqBAIJ*)C->data; 2456 2457 C->M = A->M; 2458 C->N = A->N; 2459 C->bs = A->bs; 2460 c->bs2 = a->bs2; 2461 c->mbs = a->mbs; 2462 c->nbs = a->nbs; 2463 2464 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2465 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2466 for (i=0; i<mbs; i++) { 2467 c->imax[i] = a->imax[i]; 2468 c->ilen[i] = a->ilen[i]; 2469 } 2470 2471 /* allocate the matrix space */ 2472 c->singlemalloc = PETSC_TRUE; 2473 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2474 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2475 c->j = (PetscInt*)(c->a + nz*bs2); 2476 c->i = c->j + nz; 2477 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 if (mbs > 0) { 2479 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2480 if (cpvalues == MAT_COPY_VALUES) { 2481 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2482 } else { 2483 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2484 } 2485 } 2486 2487 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2488 c->sorted = a->sorted; 2489 c->roworiented = a->roworiented; 2490 c->nonew = a->nonew; 2491 2492 if (a->diag) { 2493 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2494 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2495 for (i=0; i<mbs; i++) { 2496 c->diag[i] = a->diag[i]; 2497 } 2498 } else c->diag = 0; 2499 c->nz = a->nz; 2500 c->maxnz = a->maxnz; 2501 c->solve_work = 0; 2502 c->mult_work = 0; 2503 C->preallocated = PETSC_TRUE; 2504 C->assembled = PETSC_TRUE; 2505 2506 c->compressedrow.use = a->compressedrow.use; 2507 c->compressedrow.nrows = a->compressedrow.nrows; 2508 c->compressedrow.checked = a->compressedrow.checked; 2509 if ( a->compressedrow.checked && a->compressedrow.use){ 2510 i = a->compressedrow.nrows; 2511 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2512 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2513 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2514 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2515 } else { 2516 c->compressedrow.use = PETSC_FALSE; 2517 c->compressedrow.i = PETSC_NULL; 2518 c->compressedrow.rindex = PETSC_NULL; 2519 } 2520 C->same_nonzero = A->same_nonzero; 2521 *B = C; 2522 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatLoad_SeqBAIJ" 2528 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2529 { 2530 Mat_SeqBAIJ *a; 2531 Mat B; 2532 PetscErrorCode ierr; 2533 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2534 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2535 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2536 PetscInt *masked,nmask,tmp,bs2,ishift; 2537 PetscMPIInt size; 2538 int fd; 2539 PetscScalar *aa; 2540 MPI_Comm comm = ((PetscObject)viewer)->comm; 2541 2542 PetscFunctionBegin; 2543 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2544 bs2 = bs*bs; 2545 2546 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2547 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2548 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2549 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2550 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2551 M = header[1]; N = header[2]; nz = header[3]; 2552 2553 if (header[3] < 0) { 2554 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2555 } 2556 2557 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2558 2559 /* 2560 This code adds extra rows to make sure the number of rows is 2561 divisible by the blocksize 2562 */ 2563 mbs = M/bs; 2564 extra_rows = bs - M + bs*(mbs); 2565 if (extra_rows == bs) extra_rows = 0; 2566 else mbs++; 2567 if (extra_rows) { 2568 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2569 } 2570 2571 /* read in row lengths */ 2572 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2573 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2574 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2575 2576 /* read in column indices */ 2577 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2578 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2579 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2580 2581 /* loop over row lengths determining block row lengths */ 2582 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2583 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2584 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2585 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2586 masked = mask + mbs; 2587 rowcount = 0; nzcount = 0; 2588 for (i=0; i<mbs; i++) { 2589 nmask = 0; 2590 for (j=0; j<bs; j++) { 2591 kmax = rowlengths[rowcount]; 2592 for (k=0; k<kmax; k++) { 2593 tmp = jj[nzcount++]/bs; 2594 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2595 } 2596 rowcount++; 2597 } 2598 browlengths[i] += nmask; 2599 /* zero out the mask elements we set */ 2600 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2601 } 2602 2603 /* create our matrix */ 2604 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2605 ierr = MatSetType(B,type);CHKERRQ(ierr); 2606 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2607 a = (Mat_SeqBAIJ*)B->data; 2608 2609 /* set matrix "i" values */ 2610 a->i[0] = 0; 2611 for (i=1; i<= mbs; i++) { 2612 a->i[i] = a->i[i-1] + browlengths[i-1]; 2613 a->ilen[i-1] = browlengths[i-1]; 2614 } 2615 a->nz = 0; 2616 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2617 2618 /* read in nonzero values */ 2619 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2620 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2621 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2622 2623 /* set "a" and "j" values into matrix */ 2624 nzcount = 0; jcount = 0; 2625 for (i=0; i<mbs; i++) { 2626 nzcountb = nzcount; 2627 nmask = 0; 2628 for (j=0; j<bs; j++) { 2629 kmax = rowlengths[i*bs+j]; 2630 for (k=0; k<kmax; k++) { 2631 tmp = jj[nzcount++]/bs; 2632 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2633 } 2634 } 2635 /* sort the masked values */ 2636 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2637 2638 /* set "j" values into matrix */ 2639 maskcount = 1; 2640 for (j=0; j<nmask; j++) { 2641 a->j[jcount++] = masked[j]; 2642 mask[masked[j]] = maskcount++; 2643 } 2644 /* set "a" values into matrix */ 2645 ishift = bs2*a->i[i]; 2646 for (j=0; j<bs; j++) { 2647 kmax = rowlengths[i*bs+j]; 2648 for (k=0; k<kmax; k++) { 2649 tmp = jj[nzcountb]/bs ; 2650 block = mask[tmp] - 1; 2651 point = jj[nzcountb] - bs*tmp; 2652 idx = ishift + bs2*block + j + bs*point; 2653 a->a[idx] = (MatScalar)aa[nzcountb++]; 2654 } 2655 } 2656 /* zero out the mask elements we set */ 2657 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2658 } 2659 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2660 2661 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2662 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2663 ierr = PetscFree(aa);CHKERRQ(ierr); 2664 ierr = PetscFree(jj);CHKERRQ(ierr); 2665 ierr = PetscFree(mask);CHKERRQ(ierr); 2666 2667 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2668 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2669 ierr = MatView_Private(B);CHKERRQ(ierr); 2670 2671 *A = B; 2672 PetscFunctionReturn(0); 2673 } 2674 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatCreateSeqBAIJ" 2677 /*@C 2678 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2679 compressed row) format. For good matrix assembly performance the 2680 user should preallocate the matrix storage by setting the parameter nz 2681 (or the array nnz). By setting these parameters accurately, performance 2682 during matrix assembly can be increased by more than a factor of 50. 2683 2684 Collective on MPI_Comm 2685 2686 Input Parameters: 2687 + comm - MPI communicator, set to PETSC_COMM_SELF 2688 . bs - size of block 2689 . m - number of rows 2690 . n - number of columns 2691 . nz - number of nonzero blocks per block row (same for all rows) 2692 - nnz - array containing the number of nonzero blocks in the various block rows 2693 (possibly different for each block row) or PETSC_NULL 2694 2695 Output Parameter: 2696 . A - the matrix 2697 2698 Options Database Keys: 2699 . -mat_no_unroll - uses code that does not unroll the loops in the 2700 block calculations (much slower) 2701 . -mat_block_size - size of the blocks to use 2702 2703 Level: intermediate 2704 2705 Notes: 2706 The number of rows and columns must be divisible by blocksize. 2707 2708 If the nnz parameter is given then the nz parameter is ignored 2709 2710 A nonzero block is any block that as 1 or more nonzeros in it 2711 2712 The block AIJ format is fully compatible with standard Fortran 77 2713 storage. That is, the stored row and column indices can begin at 2714 either one (as in Fortran) or zero. See the users' manual for details. 2715 2716 Specify the preallocated storage with either nz or nnz (not both). 2717 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2718 allocation. For additional details, see the users manual chapter on 2719 matrices. 2720 2721 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2722 @*/ 2723 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2724 { 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2729 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2730 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2736 /*@C 2737 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2738 per row in the matrix. For good matrix assembly performance the 2739 user should preallocate the matrix storage by setting the parameter nz 2740 (or the array nnz). By setting these parameters accurately, performance 2741 during matrix assembly can be increased by more than a factor of 50. 2742 2743 Collective on MPI_Comm 2744 2745 Input Parameters: 2746 + A - the matrix 2747 . bs - size of block 2748 . nz - number of block nonzeros per block row (same for all rows) 2749 - nnz - array containing the number of block nonzeros in the various block rows 2750 (possibly different for each block row) or PETSC_NULL 2751 2752 Options Database Keys: 2753 . -mat_no_unroll - uses code that does not unroll the loops in the 2754 block calculations (much slower) 2755 . -mat_block_size - size of the blocks to use 2756 2757 Level: intermediate 2758 2759 Notes: 2760 If the nnz parameter is given then the nz parameter is ignored 2761 2762 The block AIJ format is fully compatible with standard Fortran 77 2763 storage. That is, the stored row and column indices can begin at 2764 either one (as in Fortran) or zero. See the users' manual for details. 2765 2766 Specify the preallocated storage with either nz or nnz (not both). 2767 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2768 allocation. For additional details, see the users manual chapter on 2769 matrices. 2770 2771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2772 @*/ 2773 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2774 { 2775 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2776 2777 PetscFunctionBegin; 2778 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2779 if (f) { 2780 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2781 } 2782 PetscFunctionReturn(0); 2783 } 2784 2785