1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for direct calls from Fortran (Used in PETSc-fun3d) 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 590 stepval = (n-1)*4; 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 rp = aj + ai[row]; 594 ap = aa + 16*ai[row]; 595 nrow = ailen[row]; 596 low = 0; 597 high = nrow; 598 for (l=0; l<n; l++) { /* loop over added columns */ 599 col = in[l]; 600 if (col <= lastcol) low = 0; else high = nrow; 601 lastcol = col; 602 value = v + k*(stepval+4 + l)*4; 603 while (high-low > 7) { 604 t = (low+high)/2; 605 if (rp[t] > col) high = t; 606 else low = t; 607 } 608 for (i=low; i<high; i++) { 609 if (rp[i] > col) break; 610 if (rp[i] == col) { 611 bap = ap + 16*i; 612 for (ii=0; ii<4; ii++,value+=stepval) { 613 for (jj=ii; jj<16; jj+=4) { 614 bap[jj] += *value++; 615 } 616 } 617 goto noinsert2; 618 } 619 } 620 N = nrow++ - 1; 621 high++; /* added new column index thus must search to one higher than before */ 622 /* shift up all the later entries in this row */ 623 for (ii=N; ii>=i; ii--) { 624 rp[ii+1] = rp[ii]; 625 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 626 } 627 if (N >= i) { 628 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 629 } 630 rp[i] = col; 631 bap = ap + 16*i; 632 for (ii=0; ii<4; ii++,value+=stepval) { 633 for (jj=ii; jj<16; jj+=4) { 634 bap[jj] = *value++; 635 } 636 } 637 noinsert2:; 638 low = i; 639 } 640 ailen[row] = nrow; 641 } 642 PetscFunctionReturnVoid(); 643 } 644 EXTERN_C_END 645 646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 647 #define matsetvalues4_ MATSETVALUES4 648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 649 #define matsetvalues4_ matsetvalues4 650 #endif 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSetValues4_" 655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 656 { 657 Mat A = *AA; 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 660 PetscInt *ai=a->i,*ailen=a->ilen; 661 PetscInt *aj=a->j,brow,bcol; 662 PetscInt ridx,cidx,lastcol = -1; 663 MatScalar *ap,value,*aa=a->a,*bap; 664 665 PetscFunctionBegin; 666 for (k=0; k<m; k++) { /* loop over added rows */ 667 row = im[k]; brow = row/4; 668 rp = aj + ai[brow]; 669 ap = aa + 16*ai[brow]; 670 nrow = ailen[brow]; 671 low = 0; 672 high = nrow; 673 for (l=0; l<n; l++) { /* loop over added columns */ 674 col = in[l]; bcol = col/4; 675 ridx = row % 4; cidx = col % 4; 676 value = v[l + k*n]; 677 if (col <= lastcol) low = 0; else high = nrow; 678 lastcol = col; 679 while (high-low > 7) { 680 t = (low+high)/2; 681 if (rp[t] > bcol) high = t; 682 else low = t; 683 } 684 for (i=low; i<high; i++) { 685 if (rp[i] > bcol) break; 686 if (rp[i] == bcol) { 687 bap = ap + 16*i + 4*cidx + ridx; 688 *bap += value; 689 goto noinsert1; 690 } 691 } 692 N = nrow++ - 1; 693 high++; /* added new column thus must search to one higher than before */ 694 /* shift up all the later entries in this row */ 695 for (ii=N; ii>=i; ii--) { 696 rp[ii+1] = rp[ii]; 697 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 698 } 699 if (N>=i) { 700 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 701 } 702 rp[i] = bcol; 703 ap[16*i + 4*cidx + ridx] = value; 704 noinsert1:; 705 low = i; 706 } 707 ailen[brow] = nrow; 708 } 709 PetscFunctionReturnVoid(); 710 } 711 EXTERN_C_END 712 713 /* UGLY, ugly, ugly 714 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 715 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 716 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 717 converts the entries into single precision and then calls ..._MatScalar() to put them 718 into the single precision data structures. 719 */ 720 #if defined(PETSC_USE_MAT_SINGLE) 721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 722 #else 723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 724 #endif 725 726 #define CHUNKSIZE 10 727 728 /* 729 Checks for missing diagonals 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 PetscErrorCode ierr; 737 PetscInt *diag,*jj = a->j,i; 738 739 PetscFunctionBegin; 740 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 741 diag = a->diag; 742 for (i=0; i<a->mbs; i++) { 743 if (jj[diag[i]] != i) { 744 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 745 } 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 755 PetscErrorCode ierr; 756 PetscInt i,j,m = a->mbs; 757 758 PetscFunctionBegin; 759 if (!a->diag) { 760 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 761 } 762 for (i=0; i<m; i++) { 763 a->diag[i] = a->i[i+1]; 764 for (j=a->i[i]; j<a->i[i+1]; j++) { 765 if (a->j[j] == i) { 766 a->diag[i] = j; 767 break; 768 } 769 } 770 } 771 PetscFunctionReturn(0); 772 } 773 774 775 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 779 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 780 { 781 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 782 PetscErrorCode ierr; 783 PetscInt n = a->mbs,i; 784 785 PetscFunctionBegin; 786 *nn = n; 787 if (!ia) PetscFunctionReturn(0); 788 if (symmetric) { 789 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 790 } else if (oshift == 1) { 791 /* temporarily add 1 to i and j indices */ 792 PetscInt nz = a->i[n]; 793 for (i=0; i<nz; i++) a->j[i]++; 794 for (i=0; i<n+1; i++) a->i[i]++; 795 *ia = a->i; *ja = a->j; 796 } else { 797 *ia = a->i; *ja = a->j; 798 } 799 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 805 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 806 { 807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt i,n = a->mbs; 810 811 PetscFunctionBegin; 812 if (!ia) PetscFunctionReturn(0); 813 if (symmetric) { 814 ierr = PetscFree(*ia);CHKERRQ(ierr); 815 ierr = PetscFree(*ja);CHKERRQ(ierr); 816 } else if (oshift == 1) { 817 PetscInt nz = a->i[n]-1; 818 for (i=0; i<nz; i++) a->j[i]--; 819 for (i=0; i<n+1; i++) a->i[i]--; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatDestroy_SeqBAIJ" 826 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 #if defined(PETSC_USE_LOG) 833 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 834 #endif 835 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 836 if (a->row) { 837 ierr = ISDestroy(a->row);CHKERRQ(ierr); 838 } 839 if (a->col) { 840 ierr = ISDestroy(a->col);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(a->diag);CHKERRQ(ierr); 843 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 844 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 845 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 846 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 847 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 848 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 849 #if defined(PETSC_USE_MAT_SINGLE) 850 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 851 #endif 852 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 853 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 854 855 ierr = PetscFree(a);CHKERRQ(ierr); 856 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatSetOption_SeqBAIJ" 869 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 870 { 871 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 switch (op) { 876 case MAT_ROW_ORIENTED: 877 a->roworiented = PETSC_TRUE; 878 break; 879 case MAT_COLUMN_ORIENTED: 880 a->roworiented = PETSC_FALSE; 881 break; 882 case MAT_COLUMNS_SORTED: 883 a->sorted = PETSC_TRUE; 884 break; 885 case MAT_COLUMNS_UNSORTED: 886 a->sorted = PETSC_FALSE; 887 break; 888 case MAT_KEEP_ZEROED_ROWS: 889 a->keepzeroedrows = PETSC_TRUE; 890 break; 891 case MAT_NO_NEW_NONZERO_LOCATIONS: 892 a->nonew = 1; 893 break; 894 case MAT_NEW_NONZERO_LOCATION_ERR: 895 a->nonew = -1; 896 break; 897 case MAT_NEW_NONZERO_ALLOCATION_ERR: 898 a->nonew = -2; 899 break; 900 case MAT_YES_NEW_NONZERO_LOCATIONS: 901 a->nonew = 0; 902 break; 903 case MAT_ROWS_SORTED: 904 case MAT_ROWS_UNSORTED: 905 case MAT_YES_NEW_DIAGONALS: 906 case MAT_IGNORE_OFF_PROC_ENTRIES: 907 case MAT_USE_HASH_TABLE: 908 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 909 break; 910 case MAT_NO_NEW_DIAGONALS: 911 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 912 case MAT_SYMMETRIC: 913 case MAT_STRUCTURALLY_SYMMETRIC: 914 case MAT_NOT_SYMMETRIC: 915 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 916 case MAT_HERMITIAN: 917 case MAT_NOT_HERMITIAN: 918 case MAT_SYMMETRY_ETERNAL: 919 case MAT_NOT_SYMMETRY_ETERNAL: 920 break; 921 default: 922 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatGetRow_SeqBAIJ" 929 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 930 { 931 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 932 PetscErrorCode ierr; 933 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 934 MatScalar *aa,*aa_i; 935 PetscScalar *v_i; 936 937 PetscFunctionBegin; 938 bs = A->rmap.bs; 939 ai = a->i; 940 aj = a->j; 941 aa = a->a; 942 bs2 = a->bs2; 943 944 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 945 946 bn = row/bs; /* Block number */ 947 bp = row % bs; /* Block Position */ 948 M = ai[bn+1] - ai[bn]; 949 *nz = bs*M; 950 951 if (v) { 952 *v = 0; 953 if (*nz) { 954 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 955 for (i=0; i<M; i++) { /* for each block in the block row */ 956 v_i = *v + i*bs; 957 aa_i = aa + bs2*(ai[bn] + i); 958 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 959 } 960 } 961 } 962 963 if (idx) { 964 *idx = 0; 965 if (*nz) { 966 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 967 for (i=0; i<M; i++) { /* for each block in the block row */ 968 idx_i = *idx + i*bs; 969 itmp = bs*aj[ai[bn] + i]; 970 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 971 } 972 } 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 979 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 980 { 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 985 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatTranspose_SeqBAIJ" 991 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 992 { 993 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 994 Mat C; 995 PetscErrorCode ierr; 996 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 997 PetscInt *rows,*cols,bs2=a->bs2; 998 PetscScalar *array; 999 1000 PetscFunctionBegin; 1001 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1002 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1003 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1004 1005 #if defined(PETSC_USE_MAT_SINGLE) 1006 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1007 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1008 #else 1009 array = a->a; 1010 #endif 1011 1012 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1013 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1014 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1015 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1016 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1017 ierr = PetscFree(col);CHKERRQ(ierr); 1018 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1019 cols = rows + bs; 1020 for (i=0; i<mbs; i++) { 1021 cols[0] = i*bs; 1022 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1023 len = ai[i+1] - ai[i]; 1024 for (j=0; j<len; j++) { 1025 rows[0] = (*aj++)*bs; 1026 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1027 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1028 array += bs2; 1029 } 1030 } 1031 ierr = PetscFree(rows);CHKERRQ(ierr); 1032 #if defined(PETSC_USE_MAT_SINGLE) 1033 ierr = PetscFree(array); 1034 #endif 1035 1036 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 1039 if (B) { 1040 *B = C; 1041 } else { 1042 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1049 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1050 { 1051 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1052 PetscErrorCode ierr; 1053 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1054 int fd; 1055 PetscScalar *aa; 1056 FILE *file; 1057 1058 PetscFunctionBegin; 1059 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1060 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1061 col_lens[0] = MAT_FILE_COOKIE; 1062 1063 col_lens[1] = A->rmap.N; 1064 col_lens[2] = A->cmap.n; 1065 col_lens[3] = a->nz*bs2; 1066 1067 /* store lengths of each row and write (including header) to file */ 1068 count = 0; 1069 for (i=0; i<a->mbs; i++) { 1070 for (j=0; j<bs; j++) { 1071 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1072 } 1073 } 1074 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1075 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1076 1077 /* store column indices (zero start index) */ 1078 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1079 count = 0; 1080 for (i=0; i<a->mbs; i++) { 1081 for (j=0; j<bs; j++) { 1082 for (k=a->i[i]; k<a->i[i+1]; k++) { 1083 for (l=0; l<bs; l++) { 1084 jj[count++] = bs*a->j[k] + l; 1085 } 1086 } 1087 } 1088 } 1089 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1090 ierr = PetscFree(jj);CHKERRQ(ierr); 1091 1092 /* store nonzero values */ 1093 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1094 count = 0; 1095 for (i=0; i<a->mbs; i++) { 1096 for (j=0; j<bs; j++) { 1097 for (k=a->i[i]; k<a->i[i+1]; k++) { 1098 for (l=0; l<bs; l++) { 1099 aa[count++] = a->a[bs2*k + l*bs + j]; 1100 } 1101 } 1102 } 1103 } 1104 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1105 ierr = PetscFree(aa);CHKERRQ(ierr); 1106 1107 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1108 if (file) { 1109 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1116 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1121 PetscViewerFormat format; 1122 1123 PetscFunctionBegin; 1124 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1125 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1126 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1127 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1128 Mat aij; 1129 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1130 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1131 ierr = MatDestroy(aij);CHKERRQ(ierr); 1132 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1133 PetscFunctionReturn(0); 1134 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1135 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1136 for (i=0; i<a->mbs; i++) { 1137 for (j=0; j<bs; j++) { 1138 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1139 for (k=a->i[i]; k<a->i[i+1]; k++) { 1140 for (l=0; l<bs; l++) { 1141 #if defined(PETSC_USE_COMPLEX) 1142 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1143 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1144 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1145 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1147 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1148 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1150 } 1151 #else 1152 if (a->a[bs2*k + l*bs + j] != 0.0) { 1153 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1154 } 1155 #endif 1156 } 1157 } 1158 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1159 } 1160 } 1161 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1162 } else { 1163 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1164 for (i=0; i<a->mbs; i++) { 1165 for (j=0; j<bs; j++) { 1166 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1167 for (k=a->i[i]; k<a->i[i+1]; k++) { 1168 for (l=0; l<bs; l++) { 1169 #if defined(PETSC_USE_COMPLEX) 1170 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1171 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1172 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1173 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1175 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1176 } else { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1178 } 1179 #else 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1181 #endif 1182 } 1183 } 1184 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1185 } 1186 } 1187 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1195 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1196 { 1197 Mat A = (Mat) Aa; 1198 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1201 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1202 MatScalar *aa; 1203 PetscViewer viewer; 1204 1205 PetscFunctionBegin; 1206 1207 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1208 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1209 1210 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1211 1212 /* loop over matrix elements drawing boxes */ 1213 color = PETSC_DRAW_BLUE; 1214 for (i=0,row=0; i<mbs; i++,row+=bs) { 1215 for (j=a->i[i]; j<a->i[i+1]; j++) { 1216 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1217 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1218 aa = a->a + j*bs2; 1219 for (k=0; k<bs; k++) { 1220 for (l=0; l<bs; l++) { 1221 if (PetscRealPart(*aa++) >= 0.) continue; 1222 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1223 } 1224 } 1225 } 1226 } 1227 color = PETSC_DRAW_CYAN; 1228 for (i=0,row=0; i<mbs; i++,row+=bs) { 1229 for (j=a->i[i]; j<a->i[i+1]; j++) { 1230 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1231 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1232 aa = a->a + j*bs2; 1233 for (k=0; k<bs; k++) { 1234 for (l=0; l<bs; l++) { 1235 if (PetscRealPart(*aa++) != 0.) continue; 1236 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1237 } 1238 } 1239 } 1240 } 1241 1242 color = PETSC_DRAW_RED; 1243 for (i=0,row=0; i<mbs; i++,row+=bs) { 1244 for (j=a->i[i]; j<a->i[i+1]; j++) { 1245 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1246 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1247 aa = a->a + j*bs2; 1248 for (k=0; k<bs; k++) { 1249 for (l=0; l<bs; l++) { 1250 if (PetscRealPart(*aa++) <= 0.) continue; 1251 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1252 } 1253 } 1254 } 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1261 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1262 { 1263 PetscErrorCode ierr; 1264 PetscReal xl,yl,xr,yr,w,h; 1265 PetscDraw draw; 1266 PetscTruth isnull; 1267 1268 PetscFunctionBegin; 1269 1270 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1271 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1272 1273 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1274 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1275 xr += w; yr += h; xl = -w; yl = -h; 1276 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1277 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1278 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatView_SeqBAIJ" 1284 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1285 { 1286 PetscErrorCode ierr; 1287 PetscTruth iascii,isbinary,isdraw; 1288 1289 PetscFunctionBegin; 1290 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1293 if (iascii){ 1294 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1295 } else if (isbinary) { 1296 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1297 } else if (isdraw) { 1298 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1299 } else { 1300 Mat B; 1301 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1302 ierr = MatView(B,viewer);CHKERRQ(ierr); 1303 ierr = MatDestroy(B);CHKERRQ(ierr); 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1311 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1312 { 1313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1314 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1315 PetscInt *ai = a->i,*ailen = a->ilen; 1316 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1317 MatScalar *ap,*aa = a->a,zero = 0.0; 1318 1319 PetscFunctionBegin; 1320 for (k=0; k<m; k++) { /* loop over rows */ 1321 row = im[k]; brow = row/bs; 1322 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1323 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1324 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1325 nrow = ailen[brow]; 1326 for (l=0; l<n; l++) { /* loop over columns */ 1327 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1328 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1329 col = in[l] ; 1330 bcol = col/bs; 1331 cidx = col%bs; 1332 ridx = row%bs; 1333 high = nrow; 1334 low = 0; /* assume unsorted */ 1335 while (high-low > 5) { 1336 t = (low+high)/2; 1337 if (rp[t] > bcol) high = t; 1338 else low = t; 1339 } 1340 for (i=low; i<high; i++) { 1341 if (rp[i] > bcol) break; 1342 if (rp[i] == bcol) { 1343 *v++ = ap[bs2*i+bs*cidx+ridx]; 1344 goto finished; 1345 } 1346 } 1347 *v++ = zero; 1348 finished:; 1349 } 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #if defined(PETSC_USE_MAT_SINGLE) 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1357 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1358 { 1359 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,N = m*n*b->bs2; 1362 MatScalar *vsingle; 1363 1364 PetscFunctionBegin; 1365 if (N > b->setvalueslen) { 1366 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1367 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1368 b->setvalueslen = N; 1369 } 1370 vsingle = b->setvaluescopy; 1371 for (i=0; i<N; i++) { 1372 vsingle[i] = v[i]; 1373 } 1374 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 #endif 1378 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1382 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1383 { 1384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1385 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1386 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1387 PetscErrorCode ierr; 1388 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1389 PetscTruth roworiented=a->roworiented; 1390 const MatScalar *value = v; 1391 MatScalar *ap,*aa = a->a,*bap; 1392 1393 PetscFunctionBegin; 1394 if (roworiented) { 1395 stepval = (n-1)*bs; 1396 } else { 1397 stepval = (m-1)*bs; 1398 } 1399 for (k=0; k<m; k++) { /* loop over added rows */ 1400 row = im[k]; 1401 if (row < 0) continue; 1402 #if defined(PETSC_USE_DEBUG) 1403 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1404 #endif 1405 rp = aj + ai[row]; 1406 ap = aa + bs2*ai[row]; 1407 rmax = imax[row]; 1408 nrow = ailen[row]; 1409 low = 0; 1410 high = nrow; 1411 for (l=0; l<n; l++) { /* loop over added columns */ 1412 if (in[l] < 0) continue; 1413 #if defined(PETSC_USE_DEBUG) 1414 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1415 #endif 1416 col = in[l]; 1417 if (roworiented) { 1418 value = v + k*(stepval+bs)*bs + l*bs; 1419 } else { 1420 value = v + l*(stepval+bs)*bs + k*bs; 1421 } 1422 if (col <= lastcol) low = 0; else high = nrow; 1423 lastcol = col; 1424 while (high-low > 7) { 1425 t = (low+high)/2; 1426 if (rp[t] > col) high = t; 1427 else low = t; 1428 } 1429 for (i=low; i<high; i++) { 1430 if (rp[i] > col) break; 1431 if (rp[i] == col) { 1432 bap = ap + bs2*i; 1433 if (roworiented) { 1434 if (is == ADD_VALUES) { 1435 for (ii=0; ii<bs; ii++,value+=stepval) { 1436 for (jj=ii; jj<bs2; jj+=bs) { 1437 bap[jj] += *value++; 1438 } 1439 } 1440 } else { 1441 for (ii=0; ii<bs; ii++,value+=stepval) { 1442 for (jj=ii; jj<bs2; jj+=bs) { 1443 bap[jj] = *value++; 1444 } 1445 } 1446 } 1447 } else { 1448 if (is == ADD_VALUES) { 1449 for (ii=0; ii<bs; ii++,value+=stepval) { 1450 for (jj=0; jj<bs; jj++) { 1451 *bap++ += *value++; 1452 } 1453 } 1454 } else { 1455 for (ii=0; ii<bs; ii++,value+=stepval) { 1456 for (jj=0; jj<bs; jj++) { 1457 *bap++ = *value++; 1458 } 1459 } 1460 } 1461 } 1462 goto noinsert2; 1463 } 1464 } 1465 if (nonew == 1) goto noinsert2; 1466 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1467 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew); 1468 N = nrow++ - 1; high++; 1469 /* shift up all the later entries in this row */ 1470 for (ii=N; ii>=i; ii--) { 1471 rp[ii+1] = rp[ii]; 1472 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1473 } 1474 if (N >= i) { 1475 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1476 } 1477 rp[i] = col; 1478 bap = ap + bs2*i; 1479 if (roworiented) { 1480 for (ii=0; ii<bs; ii++,value+=stepval) { 1481 for (jj=ii; jj<bs2; jj+=bs) { 1482 bap[jj] = *value++; 1483 } 1484 } 1485 } else { 1486 for (ii=0; ii<bs; ii++,value+=stepval) { 1487 for (jj=0; jj<bs; jj++) { 1488 *bap++ = *value++; 1489 } 1490 } 1491 } 1492 noinsert2:; 1493 low = i; 1494 } 1495 ailen[row] = nrow; 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1502 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1503 { 1504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1505 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1506 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1507 PetscErrorCode ierr; 1508 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1509 MatScalar *aa = a->a,*ap; 1510 PetscReal ratio=0.6; 1511 1512 PetscFunctionBegin; 1513 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1514 1515 if (m) rmax = ailen[0]; 1516 for (i=1; i<mbs; i++) { 1517 /* move each row back by the amount of empty slots (fshift) before it*/ 1518 fshift += imax[i-1] - ailen[i-1]; 1519 rmax = PetscMax(rmax,ailen[i]); 1520 if (fshift) { 1521 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1522 N = ailen[i]; 1523 for (j=0; j<N; j++) { 1524 ip[j-fshift] = ip[j]; 1525 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1526 } 1527 } 1528 ai[i] = ai[i-1] + ailen[i-1]; 1529 } 1530 if (mbs) { 1531 fshift += imax[mbs-1] - ailen[mbs-1]; 1532 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1533 } 1534 /* reset ilen and imax for each row */ 1535 for (i=0; i<mbs; i++) { 1536 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1537 } 1538 a->nz = ai[mbs]; 1539 1540 /* diagonals may have moved, so kill the diagonal pointers */ 1541 a->idiagvalid = PETSC_FALSE; 1542 if (fshift && a->diag) { 1543 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1544 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1545 a->diag = 0; 1546 } 1547 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1548 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1549 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1550 a->reallocs = 0; 1551 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1552 1553 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1554 if (a->compressedrow.use){ 1555 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1556 } 1557 1558 A->same_nonzero = PETSC_TRUE; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /* 1563 This function returns an array of flags which indicate the locations of contiguous 1564 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1565 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1566 Assume: sizes should be long enough to hold all the values. 1567 */ 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1570 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1571 { 1572 PetscInt i,j,k,row; 1573 PetscTruth flg; 1574 1575 PetscFunctionBegin; 1576 for (i=0,j=0; i<n; j++) { 1577 row = idx[i]; 1578 if (row%bs!=0) { /* Not the begining of a block */ 1579 sizes[j] = 1; 1580 i++; 1581 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1582 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1583 i++; 1584 } else { /* Begining of the block, so check if the complete block exists */ 1585 flg = PETSC_TRUE; 1586 for (k=1; k<bs; k++) { 1587 if (row+k != idx[i+k]) { /* break in the block */ 1588 flg = PETSC_FALSE; 1589 break; 1590 } 1591 } 1592 if (flg) { /* No break in the bs */ 1593 sizes[j] = bs; 1594 i+= bs; 1595 } else { 1596 sizes[j] = 1; 1597 i++; 1598 } 1599 } 1600 } 1601 *bs_max = j; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1607 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1608 { 1609 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1610 PetscErrorCode ierr; 1611 PetscInt i,j,k,count,*rows; 1612 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1613 PetscScalar zero = 0.0; 1614 MatScalar *aa; 1615 1616 PetscFunctionBegin; 1617 /* Make a copy of the IS and sort it */ 1618 /* allocate memory for rows,sizes */ 1619 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1620 sizes = rows + is_n; 1621 1622 /* copy IS values to rows, and sort them */ 1623 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1624 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1625 if (baij->keepzeroedrows) { 1626 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1627 bs_max = is_n; 1628 A->same_nonzero = PETSC_TRUE; 1629 } else { 1630 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1631 A->same_nonzero = PETSC_FALSE; 1632 } 1633 1634 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1635 row = rows[j]; 1636 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1637 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1638 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1639 if (sizes[i] == bs && !baij->keepzeroedrows) { 1640 if (diag != 0.0) { 1641 if (baij->ilen[row/bs] > 0) { 1642 baij->ilen[row/bs] = 1; 1643 baij->j[baij->i[row/bs]] = row/bs; 1644 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1645 } 1646 /* Now insert all the diagonal values for this bs */ 1647 for (k=0; k<bs; k++) { 1648 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } else { /* (diag == 0.0) */ 1651 baij->ilen[row/bs] = 0; 1652 } /* end (diag == 0.0) */ 1653 } else { /* (sizes[i] != bs) */ 1654 #if defined (PETSC_USE_DEBUG) 1655 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1656 #endif 1657 for (k=0; k<count; k++) { 1658 aa[0] = zero; 1659 aa += bs; 1660 } 1661 if (diag != 0.0) { 1662 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1663 } 1664 } 1665 } 1666 1667 ierr = PetscFree(rows);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1674 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1675 { 1676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1677 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1678 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1679 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1680 PetscErrorCode ierr; 1681 PetscInt ridx,cidx,bs2=a->bs2; 1682 PetscTruth roworiented=a->roworiented; 1683 MatScalar *ap,value,*aa=a->a,*bap; 1684 1685 PetscFunctionBegin; 1686 for (k=0; k<m; k++) { /* loop over added rows */ 1687 row = im[k]; 1688 brow = row/bs; 1689 if (row < 0) continue; 1690 #if defined(PETSC_USE_DEBUG) 1691 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1692 #endif 1693 rp = aj + ai[brow]; 1694 ap = aa + bs2*ai[brow]; 1695 rmax = imax[brow]; 1696 nrow = ailen[brow]; 1697 low = 0; 1698 high = nrow; 1699 for (l=0; l<n; l++) { /* loop over added columns */ 1700 if (in[l] < 0) continue; 1701 #if defined(PETSC_USE_DEBUG) 1702 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1703 #endif 1704 col = in[l]; bcol = col/bs; 1705 ridx = row % bs; cidx = col % bs; 1706 if (roworiented) { 1707 value = v[l + k*n]; 1708 } else { 1709 value = v[k + l*m]; 1710 } 1711 if (col <= lastcol) low = 0; else high = nrow; 1712 lastcol = col; 1713 while (high-low > 7) { 1714 t = (low+high)/2; 1715 if (rp[t] > bcol) high = t; 1716 else low = t; 1717 } 1718 for (i=low; i<high; i++) { 1719 if (rp[i] > bcol) break; 1720 if (rp[i] == bcol) { 1721 bap = ap + bs2*i + bs*cidx + ridx; 1722 if (is == ADD_VALUES) *bap += value; 1723 else *bap = value; 1724 goto noinsert1; 1725 } 1726 } 1727 if (nonew == 1) goto noinsert1; 1728 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1729 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew); 1730 N = nrow++ - 1; high++; 1731 /* shift up all the later entries in this row */ 1732 for (ii=N; ii>=i; ii--) { 1733 rp[ii+1] = rp[ii]; 1734 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1735 } 1736 if (N>=i) { 1737 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1738 } 1739 rp[i] = bcol; 1740 ap[bs2*i + bs*cidx + ridx] = value; 1741 a->nz++; 1742 noinsert1:; 1743 low = i; 1744 } 1745 ailen[brow] = nrow; 1746 } 1747 A->same_nonzero = PETSC_FALSE; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1754 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1755 { 1756 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1757 Mat outA; 1758 PetscErrorCode ierr; 1759 PetscTruth row_identity,col_identity; 1760 1761 PetscFunctionBegin; 1762 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1763 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1764 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1765 if (!row_identity || !col_identity) { 1766 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1767 } 1768 1769 outA = inA; 1770 inA->factor = FACTOR_LU; 1771 1772 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1773 1774 a->row = row; 1775 a->col = col; 1776 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1777 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1778 1779 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1780 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1781 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1782 1783 /* 1784 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1785 for ILU(0) factorization with natural ordering 1786 */ 1787 if (inA->rmap.bs < 8) { 1788 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1789 } else { 1790 if (!a->solve_work) { 1791 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1792 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1793 } 1794 } 1795 1796 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1797 1798 PetscFunctionReturn(0); 1799 } 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1802 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1803 { 1804 static PetscTruth called = PETSC_FALSE; 1805 MPI_Comm comm = A->comm; 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1810 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1811 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 EXTERN_C_BEGIN 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1819 { 1820 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1821 PetscInt i,nz,nbs; 1822 1823 PetscFunctionBegin; 1824 nz = baij->maxnz/baij->bs2; 1825 nbs = baij->nbs; 1826 for (i=0; i<nz; i++) { 1827 baij->j[i] = indices[i]; 1828 } 1829 baij->nz = nz; 1830 for (i=0; i<nbs; i++) { 1831 baij->ilen[i] = baij->imax[i]; 1832 } 1833 1834 PetscFunctionReturn(0); 1835 } 1836 EXTERN_C_END 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1840 /*@ 1841 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1842 in the matrix. 1843 1844 Input Parameters: 1845 + mat - the SeqBAIJ matrix 1846 - indices - the column indices 1847 1848 Level: advanced 1849 1850 Notes: 1851 This can be called if you have precomputed the nonzero structure of the 1852 matrix and want to provide it to the matrix object to improve the performance 1853 of the MatSetValues() operation. 1854 1855 You MUST have set the correct numbers of nonzeros per row in the call to 1856 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1857 1858 MUST be called before any calls to MatSetValues(); 1859 1860 @*/ 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1862 { 1863 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1864 1865 PetscFunctionBegin; 1866 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1867 PetscValidPointer(indices,2); 1868 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1869 if (f) { 1870 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1871 } else { 1872 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1873 } 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1879 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1880 { 1881 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1882 PetscErrorCode ierr; 1883 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1884 PetscReal atmp; 1885 PetscScalar *x,zero = 0.0; 1886 MatScalar *aa; 1887 PetscInt ncols,brow,krow,kcol; 1888 1889 PetscFunctionBegin; 1890 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1891 bs = A->rmap.bs; 1892 aa = a->a; 1893 ai = a->i; 1894 aj = a->j; 1895 mbs = a->mbs; 1896 1897 ierr = VecSet(v,zero);CHKERRQ(ierr); 1898 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1899 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1900 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1901 for (i=0; i<mbs; i++) { 1902 ncols = ai[1] - ai[0]; ai++; 1903 brow = bs*i; 1904 for (j=0; j<ncols; j++){ 1905 /* bcol = bs*(*aj); */ 1906 for (kcol=0; kcol<bs; kcol++){ 1907 for (krow=0; krow<bs; krow++){ 1908 atmp = PetscAbsScalar(*aa); aa++; 1909 row = brow + krow; /* row index */ 1910 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1911 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1912 } 1913 } 1914 aj++; 1915 } 1916 } 1917 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatCopy_SeqBAIJ" 1923 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1924 { 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 /* If the two matrices have the same copy implementation, use fast copy. */ 1929 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1930 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1931 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1932 1933 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1934 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1935 } 1936 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1937 } else { 1938 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1945 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1946 { 1947 PetscErrorCode ierr; 1948 1949 PetscFunctionBegin; 1950 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1956 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1957 { 1958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1959 PetscFunctionBegin; 1960 *array = a->a; 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1966 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1967 { 1968 PetscFunctionBegin; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #include "petscblaslapack.h" 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1975 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1976 { 1977 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1978 PetscErrorCode ierr; 1979 PetscInt i,bs=Y->rmap.bs,j,bs2; 1980 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1981 1982 PetscFunctionBegin; 1983 if (str == SAME_NONZERO_PATTERN) { 1984 PetscScalar alpha = a; 1985 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1986 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1987 if (y->xtoy && y->XtoY != X) { 1988 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1989 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1990 } 1991 if (!y->xtoy) { /* get xtoy */ 1992 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1993 y->XtoY = X; 1994 } 1995 bs2 = bs*bs; 1996 for (i=0; i<x->nz; i++) { 1997 j = 0; 1998 while (j < bs2){ 1999 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2000 j++; 2001 } 2002 } 2003 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2004 } else { 2005 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2006 } 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2012 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2013 { 2014 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2015 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2016 PetscScalar *aa = a->a; 2017 2018 PetscFunctionBegin; 2019 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2025 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2026 { 2027 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2028 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2029 PetscScalar *aa = a->a; 2030 2031 PetscFunctionBegin; 2032 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 2037 /* -------------------------------------------------------------------*/ 2038 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2039 MatGetRow_SeqBAIJ, 2040 MatRestoreRow_SeqBAIJ, 2041 MatMult_SeqBAIJ_N, 2042 /* 4*/ MatMultAdd_SeqBAIJ_N, 2043 MatMultTranspose_SeqBAIJ, 2044 MatMultTransposeAdd_SeqBAIJ, 2045 MatSolve_SeqBAIJ_N, 2046 0, 2047 0, 2048 /*10*/ 0, 2049 MatLUFactor_SeqBAIJ, 2050 0, 2051 0, 2052 MatTranspose_SeqBAIJ, 2053 /*15*/ MatGetInfo_SeqBAIJ, 2054 MatEqual_SeqBAIJ, 2055 MatGetDiagonal_SeqBAIJ, 2056 MatDiagonalScale_SeqBAIJ, 2057 MatNorm_SeqBAIJ, 2058 /*20*/ 0, 2059 MatAssemblyEnd_SeqBAIJ, 2060 0, 2061 MatSetOption_SeqBAIJ, 2062 MatZeroEntries_SeqBAIJ, 2063 /*25*/ MatZeroRows_SeqBAIJ, 2064 MatLUFactorSymbolic_SeqBAIJ, 2065 MatLUFactorNumeric_SeqBAIJ_N, 2066 MatCholeskyFactorSymbolic_SeqBAIJ, 2067 MatCholeskyFactorNumeric_SeqBAIJ_N, 2068 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2069 MatILUFactorSymbolic_SeqBAIJ, 2070 MatICCFactorSymbolic_SeqBAIJ, 2071 MatGetArray_SeqBAIJ, 2072 MatRestoreArray_SeqBAIJ, 2073 /*35*/ MatDuplicate_SeqBAIJ, 2074 0, 2075 0, 2076 MatILUFactor_SeqBAIJ, 2077 0, 2078 /*40*/ MatAXPY_SeqBAIJ, 2079 MatGetSubMatrices_SeqBAIJ, 2080 MatIncreaseOverlap_SeqBAIJ, 2081 MatGetValues_SeqBAIJ, 2082 MatCopy_SeqBAIJ, 2083 /*45*/ MatPrintHelp_SeqBAIJ, 2084 MatScale_SeqBAIJ, 2085 0, 2086 0, 2087 0, 2088 /*50*/ 0, 2089 MatGetRowIJ_SeqBAIJ, 2090 MatRestoreRowIJ_SeqBAIJ, 2091 0, 2092 0, 2093 /*55*/ 0, 2094 0, 2095 0, 2096 0, 2097 MatSetValuesBlocked_SeqBAIJ, 2098 /*60*/ MatGetSubMatrix_SeqBAIJ, 2099 MatDestroy_SeqBAIJ, 2100 MatView_SeqBAIJ, 2101 0, 2102 0, 2103 /*65*/ 0, 2104 0, 2105 0, 2106 0, 2107 0, 2108 /*70*/ MatGetRowMax_SeqBAIJ, 2109 MatConvert_Basic, 2110 0, 2111 0, 2112 0, 2113 /*75*/ 0, 2114 0, 2115 0, 2116 0, 2117 0, 2118 /*80*/ 0, 2119 0, 2120 0, 2121 0, 2122 MatLoad_SeqBAIJ, 2123 /*85*/ 0, 2124 0, 2125 0, 2126 0, 2127 0, 2128 /*90*/ 0, 2129 0, 2130 0, 2131 0, 2132 0, 2133 /*95*/ 0, 2134 0, 2135 0, 2136 0, 2137 0, 2138 /*100*/0, 2139 0, 2140 0, 2141 0, 2142 0, 2143 /*105*/0, 2144 MatRealPart_SeqBAIJ, 2145 MatImaginaryPart_SeqBAIJ 2146 }; 2147 2148 EXTERN_C_BEGIN 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2151 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2152 { 2153 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2154 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 if (aij->nonew != 1) { 2159 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2160 } 2161 2162 /* allocate space for values if not already there */ 2163 if (!aij->saved_values) { 2164 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2165 } 2166 2167 /* copy values over */ 2168 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 EXTERN_C_END 2172 2173 EXTERN_C_BEGIN 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2176 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2177 { 2178 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2179 PetscErrorCode ierr; 2180 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2181 2182 PetscFunctionBegin; 2183 if (aij->nonew != 1) { 2184 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2185 } 2186 if (!aij->saved_values) { 2187 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2188 } 2189 2190 /* copy values over */ 2191 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 EXTERN_C_END 2195 2196 EXTERN_C_BEGIN 2197 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2198 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2199 EXTERN_C_END 2200 2201 EXTERN_C_BEGIN 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2204 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2205 { 2206 Mat_SeqBAIJ *b; 2207 PetscErrorCode ierr; 2208 PetscInt i,mbs,nbs,bs2,newbs = bs; 2209 PetscTruth flg,skipallocation = PETSC_FALSE; 2210 2211 PetscFunctionBegin; 2212 2213 if (nz == MAT_SKIP_ALLOCATION) { 2214 skipallocation = PETSC_TRUE; 2215 nz = 0; 2216 } 2217 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2218 if (nnz && newbs != bs) { 2219 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2220 } 2221 bs = newbs; 2222 2223 B->rmap.bs = B->cmap.bs = bs; 2224 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2225 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2226 2227 B->preallocated = PETSC_TRUE; 2228 2229 mbs = B->rmap.n/bs; 2230 nbs = B->cmap.n/bs; 2231 bs2 = bs*bs; 2232 2233 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2234 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2235 } 2236 2237 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2238 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2239 if (nnz) { 2240 for (i=0; i<mbs; i++) { 2241 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2242 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2243 } 2244 } 2245 2246 b = (Mat_SeqBAIJ*)B->data; 2247 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2248 B->ops->solve = MatSolve_SeqBAIJ_Update; 2249 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2250 if (!flg) { 2251 switch (bs) { 2252 case 1: 2253 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2254 B->ops->mult = MatMult_SeqBAIJ_1; 2255 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2256 break; 2257 case 2: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2259 B->ops->mult = MatMult_SeqBAIJ_2; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2261 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2262 break; 2263 case 3: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2265 B->ops->mult = MatMult_SeqBAIJ_3; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2268 break; 2269 case 4: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2271 B->ops->mult = MatMult_SeqBAIJ_4; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2274 break; 2275 case 5: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2277 B->ops->mult = MatMult_SeqBAIJ_5; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2279 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2280 break; 2281 case 6: 2282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2283 B->ops->mult = MatMult_SeqBAIJ_6; 2284 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2285 break; 2286 case 7: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2288 B->ops->mult = MatMult_SeqBAIJ_7; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2290 break; 2291 default: 2292 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2293 B->ops->mult = MatMult_SeqBAIJ_N; 2294 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2295 break; 2296 } 2297 } 2298 B->rmap.bs = bs; 2299 b->mbs = mbs; 2300 b->nbs = nbs; 2301 if (!skipallocation) { 2302 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2303 /* b->ilen will count nonzeros in each block row so far. */ 2304 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2305 if (!nnz) { 2306 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2307 else if (nz <= 0) nz = 1; 2308 for (i=0; i<mbs; i++) b->imax[i] = nz; 2309 nz = nz*mbs; 2310 } else { 2311 nz = 0; 2312 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2313 } 2314 2315 /* allocate the matrix space */ 2316 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2317 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2318 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2319 b->singlemalloc = PETSC_TRUE; 2320 2321 b->i[0] = 0; 2322 for (i=1; i<mbs+1; i++) { 2323 b->i[i] = b->i[i-1] + b->imax[i-1]; 2324 } 2325 b->free_a = PETSC_TRUE; 2326 b->free_ij = PETSC_TRUE; 2327 } else { 2328 b->free_a = PETSC_FALSE; 2329 b->free_ij = PETSC_FALSE; 2330 } 2331 2332 B->rmap.bs = bs; 2333 b->bs2 = bs2; 2334 b->mbs = mbs; 2335 b->nz = 0; 2336 b->maxnz = nz*bs2; 2337 B->info.nz_unneeded = (PetscReal)b->maxnz; 2338 PetscFunctionReturn(0); 2339 } 2340 EXTERN_C_END 2341 2342 /*MC 2343 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2344 block sparse compressed row format. 2345 2346 Options Database Keys: 2347 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2348 2349 Level: beginner 2350 2351 .seealso: MatCreateSeqBAIJ() 2352 M*/ 2353 2354 EXTERN_C_BEGIN 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatCreate_SeqBAIJ" 2357 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2358 { 2359 PetscErrorCode ierr; 2360 PetscMPIInt size; 2361 Mat_SeqBAIJ *b; 2362 2363 PetscFunctionBegin; 2364 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2365 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2366 2367 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2368 B->data = (void*)b; 2369 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2370 B->factor = 0; 2371 B->mapping = 0; 2372 b->row = 0; 2373 b->col = 0; 2374 b->icol = 0; 2375 b->reallocs = 0; 2376 b->saved_values = 0; 2377 #if defined(PETSC_USE_MAT_SINGLE) 2378 b->setvalueslen = 0; 2379 b->setvaluescopy = PETSC_NULL; 2380 #endif 2381 2382 b->sorted = PETSC_FALSE; 2383 b->roworiented = PETSC_TRUE; 2384 b->nonew = 0; 2385 b->diag = 0; 2386 b->solve_work = 0; 2387 b->mult_work = 0; 2388 B->spptr = 0; 2389 B->info.nz_unneeded = (PetscReal)b->maxnz; 2390 b->keepzeroedrows = PETSC_FALSE; 2391 b->xtoy = 0; 2392 b->XtoY = 0; 2393 b->compressedrow.use = PETSC_FALSE; 2394 b->compressedrow.nrows = 0; 2395 b->compressedrow.i = PETSC_NULL; 2396 b->compressedrow.rindex = PETSC_NULL; 2397 b->compressedrow.checked = PETSC_FALSE; 2398 B->same_nonzero = PETSC_FALSE; 2399 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2401 "MatInvertBlockDiagonal_SeqBAIJ", 2402 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2404 "MatStoreValues_SeqBAIJ", 2405 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2407 "MatRetrieveValues_SeqBAIJ", 2408 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2410 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2411 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2413 "MatConvert_SeqBAIJ_SeqAIJ", 2414 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2416 "MatConvert_SeqBAIJ_SeqSBAIJ", 2417 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2419 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2420 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 EXTERN_C_END 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2427 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2428 { 2429 Mat C; 2430 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2431 PetscErrorCode ierr; 2432 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2433 2434 PetscFunctionBegin; 2435 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2436 2437 *B = 0; 2438 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2439 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2440 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2441 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2442 c = (Mat_SeqBAIJ*)C->data; 2443 2444 C->rmap.N = A->rmap.N; 2445 C->cmap.N = A->cmap.N; 2446 C->rmap.bs = A->rmap.bs; 2447 c->bs2 = a->bs2; 2448 c->mbs = a->mbs; 2449 c->nbs = a->nbs; 2450 2451 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2452 for (i=0; i<mbs; i++) { 2453 c->imax[i] = a->imax[i]; 2454 c->ilen[i] = a->ilen[i]; 2455 } 2456 2457 /* allocate the matrix space */ 2458 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2459 c->singlemalloc = PETSC_TRUE; 2460 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2461 if (mbs > 0) { 2462 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2463 if (cpvalues == MAT_COPY_VALUES) { 2464 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2465 } else { 2466 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2467 } 2468 } 2469 c->sorted = a->sorted; 2470 c->roworiented = a->roworiented; 2471 c->nonew = a->nonew; 2472 2473 if (a->diag) { 2474 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2476 for (i=0; i<mbs; i++) { 2477 c->diag[i] = a->diag[i]; 2478 } 2479 } else c->diag = 0; 2480 c->nz = a->nz; 2481 c->maxnz = a->maxnz; 2482 c->solve_work = 0; 2483 c->mult_work = 0; 2484 c->free_a = PETSC_TRUE; 2485 c->free_ij = PETSC_TRUE; 2486 C->preallocated = PETSC_TRUE; 2487 C->assembled = PETSC_TRUE; 2488 2489 c->compressedrow.use = a->compressedrow.use; 2490 c->compressedrow.nrows = a->compressedrow.nrows; 2491 c->compressedrow.checked = a->compressedrow.checked; 2492 if ( a->compressedrow.checked && a->compressedrow.use){ 2493 i = a->compressedrow.nrows; 2494 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2495 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2496 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2497 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2498 } else { 2499 c->compressedrow.use = PETSC_FALSE; 2500 c->compressedrow.i = PETSC_NULL; 2501 c->compressedrow.rindex = PETSC_NULL; 2502 } 2503 C->same_nonzero = A->same_nonzero; 2504 *B = C; 2505 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatLoad_SeqBAIJ" 2511 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2512 { 2513 Mat_SeqBAIJ *a; 2514 Mat B; 2515 PetscErrorCode ierr; 2516 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2517 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2518 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2519 PetscInt *masked,nmask,tmp,bs2,ishift; 2520 PetscMPIInt size; 2521 int fd; 2522 PetscScalar *aa; 2523 MPI_Comm comm = ((PetscObject)viewer)->comm; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2527 bs2 = bs*bs; 2528 2529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2530 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2531 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2532 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2533 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2534 M = header[1]; N = header[2]; nz = header[3]; 2535 2536 if (header[3] < 0) { 2537 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2538 } 2539 2540 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2541 2542 /* 2543 This code adds extra rows to make sure the number of rows is 2544 divisible by the blocksize 2545 */ 2546 mbs = M/bs; 2547 extra_rows = bs - M + bs*(mbs); 2548 if (extra_rows == bs) extra_rows = 0; 2549 else mbs++; 2550 if (extra_rows) { 2551 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2552 } 2553 2554 /* read in row lengths */ 2555 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2556 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2557 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2558 2559 /* read in column indices */ 2560 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2561 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2562 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2563 2564 /* loop over row lengths determining block row lengths */ 2565 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2566 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2567 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2568 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2569 masked = mask + mbs; 2570 rowcount = 0; nzcount = 0; 2571 for (i=0; i<mbs; i++) { 2572 nmask = 0; 2573 for (j=0; j<bs; j++) { 2574 kmax = rowlengths[rowcount]; 2575 for (k=0; k<kmax; k++) { 2576 tmp = jj[nzcount++]/bs; 2577 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2578 } 2579 rowcount++; 2580 } 2581 browlengths[i] += nmask; 2582 /* zero out the mask elements we set */ 2583 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2584 } 2585 2586 /* create our matrix */ 2587 ierr = MatCreate(comm,&B); 2588 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2589 ierr = MatSetType(B,type);CHKERRQ(ierr); 2590 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2591 a = (Mat_SeqBAIJ*)B->data; 2592 2593 /* set matrix "i" values */ 2594 a->i[0] = 0; 2595 for (i=1; i<= mbs; i++) { 2596 a->i[i] = a->i[i-1] + browlengths[i-1]; 2597 a->ilen[i-1] = browlengths[i-1]; 2598 } 2599 a->nz = 0; 2600 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2601 2602 /* read in nonzero values */ 2603 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2604 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2605 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2606 2607 /* set "a" and "j" values into matrix */ 2608 nzcount = 0; jcount = 0; 2609 for (i=0; i<mbs; i++) { 2610 nzcountb = nzcount; 2611 nmask = 0; 2612 for (j=0; j<bs; j++) { 2613 kmax = rowlengths[i*bs+j]; 2614 for (k=0; k<kmax; k++) { 2615 tmp = jj[nzcount++]/bs; 2616 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2617 } 2618 } 2619 /* sort the masked values */ 2620 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2621 2622 /* set "j" values into matrix */ 2623 maskcount = 1; 2624 for (j=0; j<nmask; j++) { 2625 a->j[jcount++] = masked[j]; 2626 mask[masked[j]] = maskcount++; 2627 } 2628 /* set "a" values into matrix */ 2629 ishift = bs2*a->i[i]; 2630 for (j=0; j<bs; j++) { 2631 kmax = rowlengths[i*bs+j]; 2632 for (k=0; k<kmax; k++) { 2633 tmp = jj[nzcountb]/bs ; 2634 block = mask[tmp] - 1; 2635 point = jj[nzcountb] - bs*tmp; 2636 idx = ishift + bs2*block + j + bs*point; 2637 a->a[idx] = (MatScalar)aa[nzcountb++]; 2638 } 2639 } 2640 /* zero out the mask elements we set */ 2641 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2642 } 2643 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2644 2645 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2646 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2647 ierr = PetscFree(aa);CHKERRQ(ierr); 2648 ierr = PetscFree(jj);CHKERRQ(ierr); 2649 ierr = PetscFree(mask);CHKERRQ(ierr); 2650 2651 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2653 ierr = MatView_Private(B);CHKERRQ(ierr); 2654 2655 *A = B; 2656 PetscFunctionReturn(0); 2657 } 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatCreateSeqBAIJ" 2661 /*@C 2662 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2663 compressed row) format. For good matrix assembly performance the 2664 user should preallocate the matrix storage by setting the parameter nz 2665 (or the array nnz). By setting these parameters accurately, performance 2666 during matrix assembly can be increased by more than a factor of 50. 2667 2668 Collective on MPI_Comm 2669 2670 Input Parameters: 2671 + comm - MPI communicator, set to PETSC_COMM_SELF 2672 . bs - size of block 2673 . m - number of rows 2674 . n - number of columns 2675 . nz - number of nonzero blocks per block row (same for all rows) 2676 - nnz - array containing the number of nonzero blocks in the various block rows 2677 (possibly different for each block row) or PETSC_NULL 2678 2679 Output Parameter: 2680 . A - the matrix 2681 2682 Options Database Keys: 2683 . -mat_no_unroll - uses code that does not unroll the loops in the 2684 block calculations (much slower) 2685 . -mat_block_size - size of the blocks to use 2686 2687 Level: intermediate 2688 2689 Notes: 2690 The number of rows and columns must be divisible by blocksize. 2691 2692 If the nnz parameter is given then the nz parameter is ignored 2693 2694 A nonzero block is any block that as 1 or more nonzeros in it 2695 2696 The block AIJ format is fully compatible with standard Fortran 77 2697 storage. That is, the stored row and column indices can begin at 2698 either one (as in Fortran) or zero. See the users' manual for details. 2699 2700 Specify the preallocated storage with either nz or nnz (not both). 2701 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2702 allocation. For additional details, see the users manual chapter on 2703 matrices. 2704 2705 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2706 @*/ 2707 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2708 { 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2713 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2714 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2715 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 #undef __FUNCT__ 2720 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2721 /*@C 2722 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2723 per row in the matrix. For good matrix assembly performance the 2724 user should preallocate the matrix storage by setting the parameter nz 2725 (or the array nnz). By setting these parameters accurately, performance 2726 during matrix assembly can be increased by more than a factor of 50. 2727 2728 Collective on MPI_Comm 2729 2730 Input Parameters: 2731 + A - the matrix 2732 . bs - size of block 2733 . nz - number of block nonzeros per block row (same for all rows) 2734 - nnz - array containing the number of block nonzeros in the various block rows 2735 (possibly different for each block row) or PETSC_NULL 2736 2737 Options Database Keys: 2738 . -mat_no_unroll - uses code that does not unroll the loops in the 2739 block calculations (much slower) 2740 . -mat_block_size - size of the blocks to use 2741 2742 Level: intermediate 2743 2744 Notes: 2745 If the nnz parameter is given then the nz parameter is ignored 2746 2747 The block AIJ format is fully compatible with standard Fortran 77 2748 storage. That is, the stored row and column indices can begin at 2749 either one (as in Fortran) or zero. See the users' manual for details. 2750 2751 Specify the preallocated storage with either nz or nnz (not both). 2752 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2753 allocation. For additional details, see the users manual chapter on 2754 matrices. 2755 2756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2757 @*/ 2758 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2759 { 2760 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2761 2762 PetscFunctionBegin; 2763 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2764 if (f) { 2765 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2766 } 2767 PetscFunctionReturn(0); 2768 } 2769 2770 #undef __FUNCT__ 2771 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2772 /*@ 2773 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2774 (upper triangular entries in CSR format) provided by the user. 2775 2776 Collective on MPI_Comm 2777 2778 Input Parameters: 2779 + comm - must be an MPI communicator of size 1 2780 . bs - size of block 2781 . m - number of rows 2782 . n - number of columns 2783 . i - row indices 2784 . j - column indices 2785 - a - matrix values 2786 2787 Output Parameter: 2788 . mat - the matrix 2789 2790 Level: intermediate 2791 2792 Notes: 2793 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2794 once the matrix is destroyed 2795 2796 You cannot set new nonzero locations into this matrix, that will generate an error. 2797 2798 The i and j indices are 0 based 2799 2800 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2801 2802 @*/ 2803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2804 { 2805 PetscErrorCode ierr; 2806 PetscInt ii; 2807 Mat_SeqBAIJ *baij; 2808 2809 PetscFunctionBegin; 2810 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2811 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2812 2813 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2814 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2815 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2816 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2817 baij = (Mat_SeqBAIJ*)(*mat)->data; 2818 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2819 2820 baij->i = i; 2821 baij->j = j; 2822 baij->a = a; 2823 baij->singlemalloc = PETSC_FALSE; 2824 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2825 baij->free_a = PETSC_FALSE; 2826 baij->free_ij = PETSC_FALSE; 2827 2828 for (ii=0; ii<m; ii++) { 2829 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2830 #if defined(PETSC_USE_DEBUG) 2831 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2832 #endif 2833 } 2834 #if defined(PETSC_USE_DEBUG) 2835 for (ii=0; ii<baij->i[m]; ii++) { 2836 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2837 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2838 } 2839 #endif 2840 2841 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2842 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845