1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for direct calls from Fortran (Used in PETSc-fun3d) 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 590 stepval = (n-1)*4; 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 rp = aj + ai[row]; 594 ap = aa + 16*ai[row]; 595 nrow = ailen[row]; 596 low = 0; 597 high = nrow; 598 for (l=0; l<n; l++) { /* loop over added columns */ 599 col = in[l]; 600 if (col <= lastcol) low = 0; else high = nrow; 601 lastcol = col; 602 value = v + k*(stepval+4 + l)*4; 603 while (high-low > 7) { 604 t = (low+high)/2; 605 if (rp[t] > col) high = t; 606 else low = t; 607 } 608 for (i=low; i<high; i++) { 609 if (rp[i] > col) break; 610 if (rp[i] == col) { 611 bap = ap + 16*i; 612 for (ii=0; ii<4; ii++,value+=stepval) { 613 for (jj=ii; jj<16; jj+=4) { 614 bap[jj] += *value++; 615 } 616 } 617 goto noinsert2; 618 } 619 } 620 N = nrow++ - 1; 621 high++; /* added new column index thus must search to one higher than before */ 622 /* shift up all the later entries in this row */ 623 for (ii=N; ii>=i; ii--) { 624 rp[ii+1] = rp[ii]; 625 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 626 } 627 if (N >= i) { 628 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 629 } 630 rp[i] = col; 631 bap = ap + 16*i; 632 for (ii=0; ii<4; ii++,value+=stepval) { 633 for (jj=ii; jj<16; jj+=4) { 634 bap[jj] = *value++; 635 } 636 } 637 noinsert2:; 638 low = i; 639 } 640 ailen[row] = nrow; 641 } 642 PetscFunctionReturnVoid(); 643 } 644 EXTERN_C_END 645 646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 647 #define matsetvalues4_ MATSETVALUES4 648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 649 #define matsetvalues4_ matsetvalues4 650 #endif 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSetValues4_" 655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 656 { 657 Mat A = *AA; 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 660 PetscInt *ai=a->i,*ailen=a->ilen; 661 PetscInt *aj=a->j,brow,bcol; 662 PetscInt ridx,cidx,lastcol = -1; 663 MatScalar *ap,value,*aa=a->a,*bap; 664 665 PetscFunctionBegin; 666 for (k=0; k<m; k++) { /* loop over added rows */ 667 row = im[k]; brow = row/4; 668 rp = aj + ai[brow]; 669 ap = aa + 16*ai[brow]; 670 nrow = ailen[brow]; 671 low = 0; 672 high = nrow; 673 for (l=0; l<n; l++) { /* loop over added columns */ 674 col = in[l]; bcol = col/4; 675 ridx = row % 4; cidx = col % 4; 676 value = v[l + k*n]; 677 if (col <= lastcol) low = 0; else high = nrow; 678 lastcol = col; 679 while (high-low > 7) { 680 t = (low+high)/2; 681 if (rp[t] > bcol) high = t; 682 else low = t; 683 } 684 for (i=low; i<high; i++) { 685 if (rp[i] > bcol) break; 686 if (rp[i] == bcol) { 687 bap = ap + 16*i + 4*cidx + ridx; 688 *bap += value; 689 goto noinsert1; 690 } 691 } 692 N = nrow++ - 1; 693 high++; /* added new column thus must search to one higher than before */ 694 /* shift up all the later entries in this row */ 695 for (ii=N; ii>=i; ii--) { 696 rp[ii+1] = rp[ii]; 697 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 698 } 699 if (N>=i) { 700 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 701 } 702 rp[i] = bcol; 703 ap[16*i + 4*cidx + ridx] = value; 704 noinsert1:; 705 low = i; 706 } 707 ailen[brow] = nrow; 708 } 709 PetscFunctionReturnVoid(); 710 } 711 EXTERN_C_END 712 713 /* UGLY, ugly, ugly 714 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 715 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 716 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 717 converts the entries into single precision and then calls ..._MatScalar() to put them 718 into the single precision data structures. 719 */ 720 #if defined(PETSC_USE_MAT_SINGLE) 721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 722 #else 723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 724 #endif 725 726 #define CHUNKSIZE 10 727 728 /* 729 Checks for missing diagonals 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 PetscErrorCode ierr; 737 PetscInt *diag,*jj = a->j,i; 738 739 PetscFunctionBegin; 740 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 741 diag = a->diag; 742 for (i=0; i<a->mbs; i++) { 743 if (jj[diag[i]] != i) { 744 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 745 } 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 755 PetscErrorCode ierr; 756 PetscInt i,j,m = a->mbs; 757 758 PetscFunctionBegin; 759 if (!a->diag) { 760 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 761 } 762 for (i=0; i<m; i++) { 763 a->diag[i] = a->i[i+1]; 764 for (j=a->i[i]; j<a->i[i+1]; j++) { 765 if (a->j[j] == i) { 766 a->diag[i] = j; 767 break; 768 } 769 } 770 } 771 PetscFunctionReturn(0); 772 } 773 774 775 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 779 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 780 { 781 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 782 PetscErrorCode ierr; 783 PetscInt n = a->mbs,i; 784 785 PetscFunctionBegin; 786 *nn = n; 787 if (!ia) PetscFunctionReturn(0); 788 if (symmetric) { 789 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 790 } else if (oshift == 1) { 791 /* temporarily add 1 to i and j indices */ 792 PetscInt nz = a->i[n]; 793 for (i=0; i<nz; i++) a->j[i]++; 794 for (i=0; i<n+1; i++) a->i[i]++; 795 *ia = a->i; *ja = a->j; 796 } else { 797 *ia = a->i; *ja = a->j; 798 } 799 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 805 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 806 { 807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt i,n = a->mbs; 810 811 PetscFunctionBegin; 812 if (!ia) PetscFunctionReturn(0); 813 if (symmetric) { 814 ierr = PetscFree(*ia);CHKERRQ(ierr); 815 ierr = PetscFree(*ja);CHKERRQ(ierr); 816 } else if (oshift == 1) { 817 PetscInt nz = a->i[n]-1; 818 for (i=0; i<nz; i++) a->j[i]--; 819 for (i=0; i<n+1; i++) a->i[i]--; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatDestroy_SeqBAIJ" 826 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 #if defined(PETSC_USE_LOG) 833 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 834 #endif 835 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 836 if (a->row) { 837 ierr = ISDestroy(a->row);CHKERRQ(ierr); 838 } 839 if (a->col) { 840 ierr = ISDestroy(a->col);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(a->diag);CHKERRQ(ierr); 843 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 844 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 845 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 846 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 847 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 848 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 849 #if defined(PETSC_USE_MAT_SINGLE) 850 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 851 #endif 852 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 853 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 854 855 ierr = PetscFree(a);CHKERRQ(ierr); 856 857 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetOption_SeqBAIJ" 870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 871 { 872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 switch (op) { 877 case MAT_ROW_ORIENTED: 878 a->roworiented = PETSC_TRUE; 879 break; 880 case MAT_COLUMN_ORIENTED: 881 a->roworiented = PETSC_FALSE; 882 break; 883 case MAT_COLUMNS_SORTED: 884 a->sorted = PETSC_TRUE; 885 break; 886 case MAT_COLUMNS_UNSORTED: 887 a->sorted = PETSC_FALSE; 888 break; 889 case MAT_KEEP_ZEROED_ROWS: 890 a->keepzeroedrows = PETSC_TRUE; 891 break; 892 case MAT_NO_NEW_NONZERO_LOCATIONS: 893 a->nonew = 1; 894 break; 895 case MAT_NEW_NONZERO_LOCATION_ERR: 896 a->nonew = -1; 897 break; 898 case MAT_NEW_NONZERO_ALLOCATION_ERR: 899 a->nonew = -2; 900 break; 901 case MAT_YES_NEW_NONZERO_LOCATIONS: 902 a->nonew = 0; 903 break; 904 case MAT_ROWS_SORTED: 905 case MAT_ROWS_UNSORTED: 906 case MAT_YES_NEW_DIAGONALS: 907 case MAT_IGNORE_OFF_PROC_ENTRIES: 908 case MAT_USE_HASH_TABLE: 909 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 910 break; 911 case MAT_NO_NEW_DIAGONALS: 912 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 913 case MAT_SYMMETRIC: 914 case MAT_STRUCTURALLY_SYMMETRIC: 915 case MAT_NOT_SYMMETRIC: 916 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 917 case MAT_HERMITIAN: 918 case MAT_NOT_HERMITIAN: 919 case MAT_SYMMETRY_ETERNAL: 920 case MAT_NOT_SYMMETRY_ETERNAL: 921 break; 922 default: 923 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatGetRow_SeqBAIJ" 930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 931 { 932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 933 PetscErrorCode ierr; 934 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 935 MatScalar *aa,*aa_i; 936 PetscScalar *v_i; 937 938 PetscFunctionBegin; 939 bs = A->rmap.bs; 940 ai = a->i; 941 aj = a->j; 942 aa = a->a; 943 bs2 = a->bs2; 944 945 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 946 947 bn = row/bs; /* Block number */ 948 bp = row % bs; /* Block Position */ 949 M = ai[bn+1] - ai[bn]; 950 *nz = bs*M; 951 952 if (v) { 953 *v = 0; 954 if (*nz) { 955 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 956 for (i=0; i<M; i++) { /* for each block in the block row */ 957 v_i = *v + i*bs; 958 aa_i = aa + bs2*(ai[bn] + i); 959 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 960 } 961 } 962 } 963 964 if (idx) { 965 *idx = 0; 966 if (*nz) { 967 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 968 for (i=0; i<M; i++) { /* for each block in the block row */ 969 idx_i = *idx + i*bs; 970 itmp = bs*aj[ai[bn] + i]; 971 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 972 } 973 } 974 } 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 981 { 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 986 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatTranspose_SeqBAIJ" 992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 993 { 994 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 995 Mat C; 996 PetscErrorCode ierr; 997 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 998 PetscInt *rows,*cols,bs2=a->bs2; 999 PetscScalar *array; 1000 1001 PetscFunctionBegin; 1002 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1003 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1004 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1005 1006 #if defined(PETSC_USE_MAT_SINGLE) 1007 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1008 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1009 #else 1010 array = a->a; 1011 #endif 1012 1013 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1014 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1015 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1016 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1017 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1018 ierr = PetscFree(col);CHKERRQ(ierr); 1019 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1020 cols = rows + bs; 1021 for (i=0; i<mbs; i++) { 1022 cols[0] = i*bs; 1023 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1024 len = ai[i+1] - ai[i]; 1025 for (j=0; j<len; j++) { 1026 rows[0] = (*aj++)*bs; 1027 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1028 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1029 array += bs2; 1030 } 1031 } 1032 ierr = PetscFree(rows);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_MAT_SINGLE) 1034 ierr = PetscFree(array); 1035 #endif 1036 1037 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 1040 if (B) { 1041 *B = C; 1042 } else { 1043 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1055 int fd; 1056 PetscScalar *aa; 1057 FILE *file; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1061 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1062 col_lens[0] = MAT_FILE_COOKIE; 1063 1064 col_lens[1] = A->rmap.N; 1065 col_lens[2] = A->cmap.n; 1066 col_lens[3] = a->nz*bs2; 1067 1068 /* store lengths of each row and write (including header) to file */ 1069 count = 0; 1070 for (i=0; i<a->mbs; i++) { 1071 for (j=0; j<bs; j++) { 1072 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1073 } 1074 } 1075 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1076 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1077 1078 /* store column indices (zero start index) */ 1079 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1080 count = 0; 1081 for (i=0; i<a->mbs; i++) { 1082 for (j=0; j<bs; j++) { 1083 for (k=a->i[i]; k<a->i[i+1]; k++) { 1084 for (l=0; l<bs; l++) { 1085 jj[count++] = bs*a->j[k] + l; 1086 } 1087 } 1088 } 1089 } 1090 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1091 ierr = PetscFree(jj);CHKERRQ(ierr); 1092 1093 /* store nonzero values */ 1094 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1095 count = 0; 1096 for (i=0; i<a->mbs; i++) { 1097 for (j=0; j<bs; j++) { 1098 for (k=a->i[i]; k<a->i[i+1]; k++) { 1099 for (l=0; l<bs; l++) { 1100 aa[count++] = a->a[bs2*k + l*bs + j]; 1101 } 1102 } 1103 } 1104 } 1105 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1106 ierr = PetscFree(aa);CHKERRQ(ierr); 1107 1108 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1109 if (file) { 1110 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1118 { 1119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1122 PetscViewerFormat format; 1123 1124 PetscFunctionBegin; 1125 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1126 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1127 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1128 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1129 Mat aij; 1130 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1131 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1132 ierr = MatDestroy(aij);CHKERRQ(ierr); 1133 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1134 PetscFunctionReturn(0); 1135 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1136 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1137 for (i=0; i<a->mbs; i++) { 1138 for (j=0; j<bs; j++) { 1139 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1140 for (k=a->i[i]; k<a->i[i+1]; k++) { 1141 for (l=0; l<bs; l++) { 1142 #if defined(PETSC_USE_COMPLEX) 1143 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1144 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1145 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1146 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } 1152 #else 1153 if (a->a[bs2*k + l*bs + j] != 0.0) { 1154 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1155 } 1156 #endif 1157 } 1158 } 1159 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1160 } 1161 } 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1163 } else { 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1165 for (i=0; i<a->mbs; i++) { 1166 for (j=0; j<bs; j++) { 1167 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1168 for (k=a->i[i]; k<a->i[i+1]; k++) { 1169 for (l=0; l<bs; l++) { 1170 #if defined(PETSC_USE_COMPLEX) 1171 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1173 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1174 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } 1180 #else 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1182 #endif 1183 } 1184 } 1185 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1186 } 1187 } 1188 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1189 } 1190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1197 { 1198 Mat A = (Mat) Aa; 1199 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1202 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1203 MatScalar *aa; 1204 PetscViewer viewer; 1205 1206 PetscFunctionBegin; 1207 1208 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1209 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1210 1211 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1212 1213 /* loop over matrix elements drawing boxes */ 1214 color = PETSC_DRAW_BLUE; 1215 for (i=0,row=0; i<mbs; i++,row+=bs) { 1216 for (j=a->i[i]; j<a->i[i+1]; j++) { 1217 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1218 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1219 aa = a->a + j*bs2; 1220 for (k=0; k<bs; k++) { 1221 for (l=0; l<bs; l++) { 1222 if (PetscRealPart(*aa++) >= 0.) continue; 1223 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 color = PETSC_DRAW_CYAN; 1229 for (i=0,row=0; i<mbs; i++,row+=bs) { 1230 for (j=a->i[i]; j<a->i[i+1]; j++) { 1231 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1232 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1233 aa = a->a + j*bs2; 1234 for (k=0; k<bs; k++) { 1235 for (l=0; l<bs; l++) { 1236 if (PetscRealPart(*aa++) != 0.) continue; 1237 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1238 } 1239 } 1240 } 1241 } 1242 1243 color = PETSC_DRAW_RED; 1244 for (i=0,row=0; i<mbs; i++,row+=bs) { 1245 for (j=a->i[i]; j<a->i[i+1]; j++) { 1246 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1248 aa = a->a + j*bs2; 1249 for (k=0; k<bs; k++) { 1250 for (l=0; l<bs; l++) { 1251 if (PetscRealPart(*aa++) <= 0.) continue; 1252 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1253 } 1254 } 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1263 { 1264 PetscErrorCode ierr; 1265 PetscReal xl,yl,xr,yr,w,h; 1266 PetscDraw draw; 1267 PetscTruth isnull; 1268 1269 PetscFunctionBegin; 1270 1271 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1272 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1273 1274 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1275 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1276 xr += w; yr += h; xl = -w; yl = -h; 1277 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1278 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1279 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_SeqBAIJ" 1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1286 { 1287 PetscErrorCode ierr; 1288 PetscTruth iascii,isbinary,isdraw; 1289 1290 PetscFunctionBegin; 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1294 if (iascii){ 1295 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1296 } else if (isbinary) { 1297 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1298 } else if (isdraw) { 1299 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1300 } else { 1301 Mat B; 1302 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1303 ierr = MatView(B,viewer);CHKERRQ(ierr); 1304 ierr = MatDestroy(B);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1316 PetscInt *ai = a->i,*ailen = a->ilen; 1317 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1318 MatScalar *ap,*aa = a->a,zero = 0.0; 1319 1320 PetscFunctionBegin; 1321 for (k=0; k<m; k++) { /* loop over rows */ 1322 row = im[k]; brow = row/bs; 1323 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1324 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1325 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1326 nrow = ailen[brow]; 1327 for (l=0; l<n; l++) { /* loop over columns */ 1328 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1329 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1330 col = in[l] ; 1331 bcol = col/bs; 1332 cidx = col%bs; 1333 ridx = row%bs; 1334 high = nrow; 1335 low = 0; /* assume unsorted */ 1336 while (high-low > 5) { 1337 t = (low+high)/2; 1338 if (rp[t] > bcol) high = t; 1339 else low = t; 1340 } 1341 for (i=low; i<high; i++) { 1342 if (rp[i] > bcol) break; 1343 if (rp[i] == bcol) { 1344 *v++ = ap[bs2*i+bs*cidx+ridx]; 1345 goto finished; 1346 } 1347 } 1348 *v++ = zero; 1349 finished:; 1350 } 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #if defined(PETSC_USE_MAT_SINGLE) 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1359 { 1360 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 PetscInt i,N = m*n*b->bs2; 1363 MatScalar *vsingle; 1364 1365 PetscFunctionBegin; 1366 if (N > b->setvalueslen) { 1367 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1368 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1369 b->setvalueslen = N; 1370 } 1371 vsingle = b->setvaluescopy; 1372 for (i=0; i<N; i++) { 1373 vsingle[i] = v[i]; 1374 } 1375 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 #endif 1379 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1384 { 1385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1386 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1387 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1388 PetscErrorCode ierr; 1389 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1390 PetscTruth roworiented=a->roworiented; 1391 const MatScalar *value = v; 1392 MatScalar *ap,*aa = a->a,*bap; 1393 1394 PetscFunctionBegin; 1395 if (roworiented) { 1396 stepval = (n-1)*bs; 1397 } else { 1398 stepval = (m-1)*bs; 1399 } 1400 for (k=0; k<m; k++) { /* loop over added rows */ 1401 row = im[k]; 1402 if (row < 0) continue; 1403 #if defined(PETSC_USE_DEBUG) 1404 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1405 #endif 1406 rp = aj + ai[row]; 1407 ap = aa + bs2*ai[row]; 1408 rmax = imax[row]; 1409 nrow = ailen[row]; 1410 low = 0; 1411 high = nrow; 1412 for (l=0; l<n; l++) { /* loop over added columns */ 1413 if (in[l] < 0) continue; 1414 #if defined(PETSC_USE_DEBUG) 1415 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1416 #endif 1417 col = in[l]; 1418 if (roworiented) { 1419 value = v + k*(stepval+bs)*bs + l*bs; 1420 } else { 1421 value = v + l*(stepval+bs)*bs + k*bs; 1422 } 1423 if (col <= lastcol) low = 0; else high = nrow; 1424 lastcol = col; 1425 while (high-low > 7) { 1426 t = (low+high)/2; 1427 if (rp[t] > col) high = t; 1428 else low = t; 1429 } 1430 for (i=low; i<high; i++) { 1431 if (rp[i] > col) break; 1432 if (rp[i] == col) { 1433 bap = ap + bs2*i; 1434 if (roworiented) { 1435 if (is == ADD_VALUES) { 1436 for (ii=0; ii<bs; ii++,value+=stepval) { 1437 for (jj=ii; jj<bs2; jj+=bs) { 1438 bap[jj] += *value++; 1439 } 1440 } 1441 } else { 1442 for (ii=0; ii<bs; ii++,value+=stepval) { 1443 for (jj=ii; jj<bs2; jj+=bs) { 1444 bap[jj] = *value++; 1445 } 1446 } 1447 } 1448 } else { 1449 if (is == ADD_VALUES) { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=0; jj<bs; jj++) { 1452 *bap++ += *value++; 1453 } 1454 } 1455 } else { 1456 for (ii=0; ii<bs; ii++,value+=stepval) { 1457 for (jj=0; jj<bs; jj++) { 1458 *bap++ = *value++; 1459 } 1460 } 1461 } 1462 } 1463 goto noinsert2; 1464 } 1465 } 1466 if (nonew == 1) goto noinsert2; 1467 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1468 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew); 1469 N = nrow++ - 1; high++; 1470 /* shift up all the later entries in this row */ 1471 for (ii=N; ii>=i; ii--) { 1472 rp[ii+1] = rp[ii]; 1473 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1474 } 1475 if (N >= i) { 1476 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1477 } 1478 rp[i] = col; 1479 bap = ap + bs2*i; 1480 if (roworiented) { 1481 for (ii=0; ii<bs; ii++,value+=stepval) { 1482 for (jj=ii; jj<bs2; jj+=bs) { 1483 bap[jj] = *value++; 1484 } 1485 } 1486 } else { 1487 for (ii=0; ii<bs; ii++,value+=stepval) { 1488 for (jj=0; jj<bs; jj++) { 1489 *bap++ = *value++; 1490 } 1491 } 1492 } 1493 noinsert2:; 1494 low = i; 1495 } 1496 ailen[row] = nrow; 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1503 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1504 { 1505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1506 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1507 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1508 PetscErrorCode ierr; 1509 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1510 MatScalar *aa = a->a,*ap; 1511 PetscReal ratio=0.6; 1512 1513 PetscFunctionBegin; 1514 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1515 1516 if (m) rmax = ailen[0]; 1517 for (i=1; i<mbs; i++) { 1518 /* move each row back by the amount of empty slots (fshift) before it*/ 1519 fshift += imax[i-1] - ailen[i-1]; 1520 rmax = PetscMax(rmax,ailen[i]); 1521 if (fshift) { 1522 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1523 N = ailen[i]; 1524 for (j=0; j<N; j++) { 1525 ip[j-fshift] = ip[j]; 1526 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1527 } 1528 } 1529 ai[i] = ai[i-1] + ailen[i-1]; 1530 } 1531 if (mbs) { 1532 fshift += imax[mbs-1] - ailen[mbs-1]; 1533 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1534 } 1535 /* reset ilen and imax for each row */ 1536 for (i=0; i<mbs; i++) { 1537 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1538 } 1539 a->nz = ai[mbs]; 1540 1541 /* diagonals may have moved, so kill the diagonal pointers */ 1542 a->idiagvalid = PETSC_FALSE; 1543 if (fshift && a->diag) { 1544 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1545 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1546 a->diag = 0; 1547 } 1548 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1549 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1550 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1551 a->reallocs = 0; 1552 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1553 1554 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1555 if (a->compressedrow.use){ 1556 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1557 } 1558 1559 A->same_nonzero = PETSC_TRUE; 1560 PetscFunctionReturn(0); 1561 } 1562 1563 /* 1564 This function returns an array of flags which indicate the locations of contiguous 1565 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1566 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1567 Assume: sizes should be long enough to hold all the values. 1568 */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1572 { 1573 PetscInt i,j,k,row; 1574 PetscTruth flg; 1575 1576 PetscFunctionBegin; 1577 for (i=0,j=0; i<n; j++) { 1578 row = idx[i]; 1579 if (row%bs!=0) { /* Not the begining of a block */ 1580 sizes[j] = 1; 1581 i++; 1582 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1583 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1584 i++; 1585 } else { /* Begining of the block, so check if the complete block exists */ 1586 flg = PETSC_TRUE; 1587 for (k=1; k<bs; k++) { 1588 if (row+k != idx[i+k]) { /* break in the block */ 1589 flg = PETSC_FALSE; 1590 break; 1591 } 1592 } 1593 if (flg) { /* No break in the bs */ 1594 sizes[j] = bs; 1595 i+= bs; 1596 } else { 1597 sizes[j] = 1; 1598 i++; 1599 } 1600 } 1601 } 1602 *bs_max = j; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1609 { 1610 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1611 PetscErrorCode ierr; 1612 PetscInt i,j,k,count,*rows; 1613 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1614 PetscScalar zero = 0.0; 1615 MatScalar *aa; 1616 1617 PetscFunctionBegin; 1618 /* Make a copy of the IS and sort it */ 1619 /* allocate memory for rows,sizes */ 1620 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1621 sizes = rows + is_n; 1622 1623 /* copy IS values to rows, and sort them */ 1624 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1625 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1626 if (baij->keepzeroedrows) { 1627 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1628 bs_max = is_n; 1629 A->same_nonzero = PETSC_TRUE; 1630 } else { 1631 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1632 A->same_nonzero = PETSC_FALSE; 1633 } 1634 1635 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1636 row = rows[j]; 1637 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1638 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1639 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1640 if (sizes[i] == bs && !baij->keepzeroedrows) { 1641 if (diag != 0.0) { 1642 if (baij->ilen[row/bs] > 0) { 1643 baij->ilen[row/bs] = 1; 1644 baij->j[baij->i[row/bs]] = row/bs; 1645 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1646 } 1647 /* Now insert all the diagonal values for this bs */ 1648 for (k=0; k<bs; k++) { 1649 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1650 } 1651 } else { /* (diag == 0.0) */ 1652 baij->ilen[row/bs] = 0; 1653 } /* end (diag == 0.0) */ 1654 } else { /* (sizes[i] != bs) */ 1655 #if defined (PETSC_USE_DEBUG) 1656 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1657 #endif 1658 for (k=0; k<count; k++) { 1659 aa[0] = zero; 1660 aa += bs; 1661 } 1662 if (diag != 0.0) { 1663 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1664 } 1665 } 1666 } 1667 1668 ierr = PetscFree(rows);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1675 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1676 { 1677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1678 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1679 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1680 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1681 PetscErrorCode ierr; 1682 PetscInt ridx,cidx,bs2=a->bs2; 1683 PetscTruth roworiented=a->roworiented; 1684 MatScalar *ap,value,*aa=a->a,*bap; 1685 1686 PetscFunctionBegin; 1687 for (k=0; k<m; k++) { /* loop over added rows */ 1688 row = im[k]; 1689 brow = row/bs; 1690 if (row < 0) continue; 1691 #if defined(PETSC_USE_DEBUG) 1692 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1693 #endif 1694 rp = aj + ai[brow]; 1695 ap = aa + bs2*ai[brow]; 1696 rmax = imax[brow]; 1697 nrow = ailen[brow]; 1698 low = 0; 1699 high = nrow; 1700 for (l=0; l<n; l++) { /* loop over added columns */ 1701 if (in[l] < 0) continue; 1702 #if defined(PETSC_USE_DEBUG) 1703 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1704 #endif 1705 col = in[l]; bcol = col/bs; 1706 ridx = row % bs; cidx = col % bs; 1707 if (roworiented) { 1708 value = v[l + k*n]; 1709 } else { 1710 value = v[k + l*m]; 1711 } 1712 if (col <= lastcol) low = 0; else high = nrow; 1713 lastcol = col; 1714 while (high-low > 7) { 1715 t = (low+high)/2; 1716 if (rp[t] > bcol) high = t; 1717 else low = t; 1718 } 1719 for (i=low; i<high; i++) { 1720 if (rp[i] > bcol) break; 1721 if (rp[i] == bcol) { 1722 bap = ap + bs2*i + bs*cidx + ridx; 1723 if (is == ADD_VALUES) *bap += value; 1724 else *bap = value; 1725 goto noinsert1; 1726 } 1727 } 1728 if (nonew == 1) goto noinsert1; 1729 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1730 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew); 1731 N = nrow++ - 1; high++; 1732 /* shift up all the later entries in this row */ 1733 for (ii=N; ii>=i; ii--) { 1734 rp[ii+1] = rp[ii]; 1735 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1736 } 1737 if (N>=i) { 1738 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1739 } 1740 rp[i] = bcol; 1741 ap[bs2*i + bs*cidx + ridx] = value; 1742 a->nz++; 1743 noinsert1:; 1744 low = i; 1745 } 1746 ailen[brow] = nrow; 1747 } 1748 A->same_nonzero = PETSC_FALSE; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1755 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1756 { 1757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1758 Mat outA; 1759 PetscErrorCode ierr; 1760 PetscTruth row_identity,col_identity; 1761 1762 PetscFunctionBegin; 1763 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1764 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1765 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1766 if (!row_identity || !col_identity) { 1767 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1768 } 1769 1770 outA = inA; 1771 inA->factor = FACTOR_LU; 1772 1773 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1774 1775 a->row = row; 1776 a->col = col; 1777 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1778 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1779 1780 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1781 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1782 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1783 1784 /* 1785 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1786 for ILU(0) factorization with natural ordering 1787 */ 1788 if (inA->rmap.bs < 8) { 1789 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1790 } else { 1791 if (!a->solve_work) { 1792 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1793 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1794 } 1795 } 1796 1797 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1798 1799 PetscFunctionReturn(0); 1800 } 1801 1802 EXTERN_C_BEGIN 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1805 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1806 { 1807 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1808 PetscInt i,nz,nbs; 1809 1810 PetscFunctionBegin; 1811 nz = baij->maxnz/baij->bs2; 1812 nbs = baij->nbs; 1813 for (i=0; i<nz; i++) { 1814 baij->j[i] = indices[i]; 1815 } 1816 baij->nz = nz; 1817 for (i=0; i<nbs; i++) { 1818 baij->ilen[i] = baij->imax[i]; 1819 } 1820 1821 PetscFunctionReturn(0); 1822 } 1823 EXTERN_C_END 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1827 /*@ 1828 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1829 in the matrix. 1830 1831 Input Parameters: 1832 + mat - the SeqBAIJ matrix 1833 - indices - the column indices 1834 1835 Level: advanced 1836 1837 Notes: 1838 This can be called if you have precomputed the nonzero structure of the 1839 matrix and want to provide it to the matrix object to improve the performance 1840 of the MatSetValues() operation. 1841 1842 You MUST have set the correct numbers of nonzeros per row in the call to 1843 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1844 1845 MUST be called before any calls to MatSetValues(); 1846 1847 @*/ 1848 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1849 { 1850 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1851 1852 PetscFunctionBegin; 1853 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1854 PetscValidPointer(indices,2); 1855 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1856 if (f) { 1857 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1858 } else { 1859 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1866 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1867 { 1868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1869 PetscErrorCode ierr; 1870 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1871 PetscReal atmp; 1872 PetscScalar *x,zero = 0.0; 1873 MatScalar *aa; 1874 PetscInt ncols,brow,krow,kcol; 1875 1876 PetscFunctionBegin; 1877 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1878 bs = A->rmap.bs; 1879 aa = a->a; 1880 ai = a->i; 1881 aj = a->j; 1882 mbs = a->mbs; 1883 1884 ierr = VecSet(v,zero);CHKERRQ(ierr); 1885 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1886 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1887 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1888 for (i=0; i<mbs; i++) { 1889 ncols = ai[1] - ai[0]; ai++; 1890 brow = bs*i; 1891 for (j=0; j<ncols; j++){ 1892 /* bcol = bs*(*aj); */ 1893 for (kcol=0; kcol<bs; kcol++){ 1894 for (krow=0; krow<bs; krow++){ 1895 atmp = PetscAbsScalar(*aa); aa++; 1896 row = brow + krow; /* row index */ 1897 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1898 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1899 } 1900 } 1901 aj++; 1902 } 1903 } 1904 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatCopy_SeqBAIJ" 1910 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1911 { 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 /* If the two matrices have the same copy implementation, use fast copy. */ 1916 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1917 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1918 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1919 1920 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1921 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1922 } 1923 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1924 } else { 1925 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1932 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1933 { 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1943 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1944 { 1945 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1946 PetscFunctionBegin; 1947 *array = a->a; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1953 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1954 { 1955 PetscFunctionBegin; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #include "petscblaslapack.h" 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1962 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1963 { 1964 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1965 PetscErrorCode ierr; 1966 PetscInt i,bs=Y->rmap.bs,j,bs2; 1967 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1968 1969 PetscFunctionBegin; 1970 if (str == SAME_NONZERO_PATTERN) { 1971 PetscScalar alpha = a; 1972 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1973 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1974 if (y->xtoy && y->XtoY != X) { 1975 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1976 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1977 } 1978 if (!y->xtoy) { /* get xtoy */ 1979 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1980 y->XtoY = X; 1981 } 1982 bs2 = bs*bs; 1983 for (i=0; i<x->nz; i++) { 1984 j = 0; 1985 while (j < bs2){ 1986 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1987 j++; 1988 } 1989 } 1990 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1991 } else { 1992 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRealPart_SeqBAIJ" 1999 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2000 { 2001 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2002 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2003 PetscScalar *aa = a->a; 2004 2005 PetscFunctionBegin; 2006 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2012 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2013 { 2014 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2015 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2016 PetscScalar *aa = a->a; 2017 2018 PetscFunctionBegin; 2019 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 2024 /* -------------------------------------------------------------------*/ 2025 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2026 MatGetRow_SeqBAIJ, 2027 MatRestoreRow_SeqBAIJ, 2028 MatMult_SeqBAIJ_N, 2029 /* 4*/ MatMultAdd_SeqBAIJ_N, 2030 MatMultTranspose_SeqBAIJ, 2031 MatMultTransposeAdd_SeqBAIJ, 2032 MatSolve_SeqBAIJ_N, 2033 0, 2034 0, 2035 /*10*/ 0, 2036 MatLUFactor_SeqBAIJ, 2037 0, 2038 0, 2039 MatTranspose_SeqBAIJ, 2040 /*15*/ MatGetInfo_SeqBAIJ, 2041 MatEqual_SeqBAIJ, 2042 MatGetDiagonal_SeqBAIJ, 2043 MatDiagonalScale_SeqBAIJ, 2044 MatNorm_SeqBAIJ, 2045 /*20*/ 0, 2046 MatAssemblyEnd_SeqBAIJ, 2047 0, 2048 MatSetOption_SeqBAIJ, 2049 MatZeroEntries_SeqBAIJ, 2050 /*25*/ MatZeroRows_SeqBAIJ, 2051 MatLUFactorSymbolic_SeqBAIJ, 2052 MatLUFactorNumeric_SeqBAIJ_N, 2053 MatCholeskyFactorSymbolic_SeqBAIJ, 2054 MatCholeskyFactorNumeric_SeqBAIJ_N, 2055 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2056 MatILUFactorSymbolic_SeqBAIJ, 2057 MatICCFactorSymbolic_SeqBAIJ, 2058 MatGetArray_SeqBAIJ, 2059 MatRestoreArray_SeqBAIJ, 2060 /*35*/ MatDuplicate_SeqBAIJ, 2061 0, 2062 0, 2063 MatILUFactor_SeqBAIJ, 2064 0, 2065 /*40*/ MatAXPY_SeqBAIJ, 2066 MatGetSubMatrices_SeqBAIJ, 2067 MatIncreaseOverlap_SeqBAIJ, 2068 MatGetValues_SeqBAIJ, 2069 MatCopy_SeqBAIJ, 2070 /*45*/ 0, 2071 MatScale_SeqBAIJ, 2072 0, 2073 0, 2074 0, 2075 /*50*/ 0, 2076 MatGetRowIJ_SeqBAIJ, 2077 MatRestoreRowIJ_SeqBAIJ, 2078 0, 2079 0, 2080 /*55*/ 0, 2081 0, 2082 0, 2083 0, 2084 MatSetValuesBlocked_SeqBAIJ, 2085 /*60*/ MatGetSubMatrix_SeqBAIJ, 2086 MatDestroy_SeqBAIJ, 2087 MatView_SeqBAIJ, 2088 0, 2089 0, 2090 /*65*/ 0, 2091 0, 2092 0, 2093 0, 2094 0, 2095 /*70*/ MatGetRowMax_SeqBAIJ, 2096 MatConvert_Basic, 2097 0, 2098 0, 2099 0, 2100 /*75*/ 0, 2101 0, 2102 0, 2103 0, 2104 0, 2105 /*80*/ 0, 2106 0, 2107 0, 2108 0, 2109 MatLoad_SeqBAIJ, 2110 /*85*/ 0, 2111 0, 2112 0, 2113 0, 2114 0, 2115 /*90*/ 0, 2116 0, 2117 0, 2118 0, 2119 0, 2120 /*95*/ 0, 2121 0, 2122 0, 2123 0, 2124 0, 2125 /*100*/0, 2126 0, 2127 0, 2128 0, 2129 0, 2130 /*105*/0, 2131 MatRealPart_SeqBAIJ, 2132 MatImaginaryPart_SeqBAIJ 2133 }; 2134 2135 EXTERN_C_BEGIN 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2138 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2139 { 2140 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2141 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 if (aij->nonew != 1) { 2146 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2147 } 2148 2149 /* allocate space for values if not already there */ 2150 if (!aij->saved_values) { 2151 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2152 } 2153 2154 /* copy values over */ 2155 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 EXTERN_C_END 2159 2160 EXTERN_C_BEGIN 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2163 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2164 { 2165 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2166 PetscErrorCode ierr; 2167 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2168 2169 PetscFunctionBegin; 2170 if (aij->nonew != 1) { 2171 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2172 } 2173 if (!aij->saved_values) { 2174 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2175 } 2176 2177 /* copy values over */ 2178 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 EXTERN_C_END 2182 2183 EXTERN_C_BEGIN 2184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2186 EXTERN_C_END 2187 2188 EXTERN_C_BEGIN 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2191 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2192 { 2193 Mat_SeqBAIJ *b; 2194 PetscErrorCode ierr; 2195 PetscInt i,mbs,nbs,bs2,newbs = bs; 2196 PetscTruth flg,skipallocation = PETSC_FALSE; 2197 2198 PetscFunctionBegin; 2199 2200 if (nz == MAT_SKIP_ALLOCATION) { 2201 skipallocation = PETSC_TRUE; 2202 nz = 0; 2203 } 2204 2205 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2206 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2207 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2208 2209 if (nnz && newbs != bs) { 2210 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2211 } 2212 bs = newbs; 2213 2214 B->rmap.bs = B->cmap.bs = bs; 2215 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2216 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2217 2218 B->preallocated = PETSC_TRUE; 2219 2220 mbs = B->rmap.n/bs; 2221 nbs = B->cmap.n/bs; 2222 bs2 = bs*bs; 2223 2224 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2225 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2226 } 2227 2228 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2229 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2230 if (nnz) { 2231 for (i=0; i<mbs; i++) { 2232 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2233 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2234 } 2235 } 2236 2237 b = (Mat_SeqBAIJ*)B->data; 2238 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2239 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2240 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2241 2242 B->ops->solve = MatSolve_SeqBAIJ_Update; 2243 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2244 if (!flg) { 2245 switch (bs) { 2246 case 1: 2247 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2248 B->ops->mult = MatMult_SeqBAIJ_1; 2249 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2250 break; 2251 case 2: 2252 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2253 B->ops->mult = MatMult_SeqBAIJ_2; 2254 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2255 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2256 break; 2257 case 3: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2259 B->ops->mult = MatMult_SeqBAIJ_3; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2261 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2262 break; 2263 case 4: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2265 B->ops->mult = MatMult_SeqBAIJ_4; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2268 break; 2269 case 5: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2271 B->ops->mult = MatMult_SeqBAIJ_5; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2274 break; 2275 case 6: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2277 B->ops->mult = MatMult_SeqBAIJ_6; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2279 break; 2280 case 7: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2282 B->ops->mult = MatMult_SeqBAIJ_7; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2284 break; 2285 default: 2286 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2287 B->ops->mult = MatMult_SeqBAIJ_N; 2288 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2289 break; 2290 } 2291 } 2292 B->rmap.bs = bs; 2293 b->mbs = mbs; 2294 b->nbs = nbs; 2295 if (!skipallocation) { 2296 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2297 /* b->ilen will count nonzeros in each block row so far. */ 2298 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2299 if (!nnz) { 2300 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2301 else if (nz <= 0) nz = 1; 2302 for (i=0; i<mbs; i++) b->imax[i] = nz; 2303 nz = nz*mbs; 2304 } else { 2305 nz = 0; 2306 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2307 } 2308 2309 /* allocate the matrix space */ 2310 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2311 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2312 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2313 b->singlemalloc = PETSC_TRUE; 2314 2315 b->i[0] = 0; 2316 for (i=1; i<mbs+1; i++) { 2317 b->i[i] = b->i[i-1] + b->imax[i-1]; 2318 } 2319 b->free_a = PETSC_TRUE; 2320 b->free_ij = PETSC_TRUE; 2321 } else { 2322 b->free_a = PETSC_FALSE; 2323 b->free_ij = PETSC_FALSE; 2324 } 2325 2326 B->rmap.bs = bs; 2327 b->bs2 = bs2; 2328 b->mbs = mbs; 2329 b->nz = 0; 2330 b->maxnz = nz*bs2; 2331 B->info.nz_unneeded = (PetscReal)b->maxnz; 2332 PetscFunctionReturn(0); 2333 } 2334 EXTERN_C_END 2335 2336 /*MC 2337 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2338 block sparse compressed row format. 2339 2340 Options Database Keys: 2341 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2342 2343 Level: beginner 2344 2345 .seealso: MatCreateSeqBAIJ() 2346 M*/ 2347 2348 EXTERN_C_BEGIN 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatCreate_SeqBAIJ" 2351 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2352 { 2353 PetscErrorCode ierr; 2354 PetscMPIInt size; 2355 Mat_SeqBAIJ *b; 2356 2357 PetscFunctionBegin; 2358 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2359 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2360 2361 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2362 B->data = (void*)b; 2363 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2364 B->factor = 0; 2365 B->mapping = 0; 2366 b->row = 0; 2367 b->col = 0; 2368 b->icol = 0; 2369 b->reallocs = 0; 2370 b->saved_values = 0; 2371 #if defined(PETSC_USE_MAT_SINGLE) 2372 b->setvalueslen = 0; 2373 b->setvaluescopy = PETSC_NULL; 2374 #endif 2375 2376 b->sorted = PETSC_FALSE; 2377 b->roworiented = PETSC_TRUE; 2378 b->nonew = 0; 2379 b->diag = 0; 2380 b->solve_work = 0; 2381 b->mult_work = 0; 2382 B->spptr = 0; 2383 B->info.nz_unneeded = (PetscReal)b->maxnz; 2384 b->keepzeroedrows = PETSC_FALSE; 2385 b->xtoy = 0; 2386 b->XtoY = 0; 2387 b->compressedrow.use = PETSC_FALSE; 2388 b->compressedrow.nrows = 0; 2389 b->compressedrow.i = PETSC_NULL; 2390 b->compressedrow.rindex = PETSC_NULL; 2391 b->compressedrow.checked = PETSC_FALSE; 2392 B->same_nonzero = PETSC_FALSE; 2393 2394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2395 "MatInvertBlockDiagonal_SeqBAIJ", 2396 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2398 "MatStoreValues_SeqBAIJ", 2399 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2401 "MatRetrieveValues_SeqBAIJ", 2402 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2404 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2405 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2407 "MatConvert_SeqBAIJ_SeqAIJ", 2408 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2410 "MatConvert_SeqBAIJ_SeqSBAIJ", 2411 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2413 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2414 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 EXTERN_C_END 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2422 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2423 { 2424 Mat C; 2425 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2426 PetscErrorCode ierr; 2427 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2428 2429 PetscFunctionBegin; 2430 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2431 2432 *B = 0; 2433 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2434 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2435 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2436 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2437 c = (Mat_SeqBAIJ*)C->data; 2438 2439 C->rmap.N = A->rmap.N; 2440 C->cmap.N = A->cmap.N; 2441 C->rmap.bs = A->rmap.bs; 2442 c->bs2 = a->bs2; 2443 c->mbs = a->mbs; 2444 c->nbs = a->nbs; 2445 2446 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2447 for (i=0; i<mbs; i++) { 2448 c->imax[i] = a->imax[i]; 2449 c->ilen[i] = a->ilen[i]; 2450 } 2451 2452 /* allocate the matrix space */ 2453 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2454 c->singlemalloc = PETSC_TRUE; 2455 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2456 if (mbs > 0) { 2457 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2458 if (cpvalues == MAT_COPY_VALUES) { 2459 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2460 } else { 2461 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2462 } 2463 } 2464 c->sorted = a->sorted; 2465 c->roworiented = a->roworiented; 2466 c->nonew = a->nonew; 2467 2468 if (a->diag) { 2469 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2470 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2471 for (i=0; i<mbs; i++) { 2472 c->diag[i] = a->diag[i]; 2473 } 2474 } else c->diag = 0; 2475 c->nz = a->nz; 2476 c->maxnz = a->maxnz; 2477 c->solve_work = 0; 2478 c->mult_work = 0; 2479 c->free_a = PETSC_TRUE; 2480 c->free_ij = PETSC_TRUE; 2481 C->preallocated = PETSC_TRUE; 2482 C->assembled = PETSC_TRUE; 2483 2484 c->compressedrow.use = a->compressedrow.use; 2485 c->compressedrow.nrows = a->compressedrow.nrows; 2486 c->compressedrow.checked = a->compressedrow.checked; 2487 if ( a->compressedrow.checked && a->compressedrow.use){ 2488 i = a->compressedrow.nrows; 2489 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2490 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2491 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2492 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2493 } else { 2494 c->compressedrow.use = PETSC_FALSE; 2495 c->compressedrow.i = PETSC_NULL; 2496 c->compressedrow.rindex = PETSC_NULL; 2497 } 2498 C->same_nonzero = A->same_nonzero; 2499 *B = C; 2500 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatLoad_SeqBAIJ" 2506 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2507 { 2508 Mat_SeqBAIJ *a; 2509 Mat B; 2510 PetscErrorCode ierr; 2511 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2512 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2513 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2514 PetscInt *masked,nmask,tmp,bs2,ishift; 2515 PetscMPIInt size; 2516 int fd; 2517 PetscScalar *aa; 2518 MPI_Comm comm = ((PetscObject)viewer)->comm; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2522 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2523 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2524 bs2 = bs*bs; 2525 2526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2527 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2528 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2529 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2530 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2531 M = header[1]; N = header[2]; nz = header[3]; 2532 2533 if (header[3] < 0) { 2534 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2535 } 2536 2537 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2538 2539 /* 2540 This code adds extra rows to make sure the number of rows is 2541 divisible by the blocksize 2542 */ 2543 mbs = M/bs; 2544 extra_rows = bs - M + bs*(mbs); 2545 if (extra_rows == bs) extra_rows = 0; 2546 else mbs++; 2547 if (extra_rows) { 2548 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2549 } 2550 2551 /* read in row lengths */ 2552 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2553 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2554 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2555 2556 /* read in column indices */ 2557 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2558 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2559 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2560 2561 /* loop over row lengths determining block row lengths */ 2562 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2563 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2564 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2565 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2566 masked = mask + mbs; 2567 rowcount = 0; nzcount = 0; 2568 for (i=0; i<mbs; i++) { 2569 nmask = 0; 2570 for (j=0; j<bs; j++) { 2571 kmax = rowlengths[rowcount]; 2572 for (k=0; k<kmax; k++) { 2573 tmp = jj[nzcount++]/bs; 2574 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2575 } 2576 rowcount++; 2577 } 2578 browlengths[i] += nmask; 2579 /* zero out the mask elements we set */ 2580 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2581 } 2582 2583 /* create our matrix */ 2584 ierr = MatCreate(comm,&B); 2585 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2586 ierr = MatSetType(B,type);CHKERRQ(ierr); 2587 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2588 a = (Mat_SeqBAIJ*)B->data; 2589 2590 /* set matrix "i" values */ 2591 a->i[0] = 0; 2592 for (i=1; i<= mbs; i++) { 2593 a->i[i] = a->i[i-1] + browlengths[i-1]; 2594 a->ilen[i-1] = browlengths[i-1]; 2595 } 2596 a->nz = 0; 2597 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2598 2599 /* read in nonzero values */ 2600 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2601 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2602 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2603 2604 /* set "a" and "j" values into matrix */ 2605 nzcount = 0; jcount = 0; 2606 for (i=0; i<mbs; i++) { 2607 nzcountb = nzcount; 2608 nmask = 0; 2609 for (j=0; j<bs; j++) { 2610 kmax = rowlengths[i*bs+j]; 2611 for (k=0; k<kmax; k++) { 2612 tmp = jj[nzcount++]/bs; 2613 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2614 } 2615 } 2616 /* sort the masked values */ 2617 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2618 2619 /* set "j" values into matrix */ 2620 maskcount = 1; 2621 for (j=0; j<nmask; j++) { 2622 a->j[jcount++] = masked[j]; 2623 mask[masked[j]] = maskcount++; 2624 } 2625 /* set "a" values into matrix */ 2626 ishift = bs2*a->i[i]; 2627 for (j=0; j<bs; j++) { 2628 kmax = rowlengths[i*bs+j]; 2629 for (k=0; k<kmax; k++) { 2630 tmp = jj[nzcountb]/bs ; 2631 block = mask[tmp] - 1; 2632 point = jj[nzcountb] - bs*tmp; 2633 idx = ishift + bs2*block + j + bs*point; 2634 a->a[idx] = (MatScalar)aa[nzcountb++]; 2635 } 2636 } 2637 /* zero out the mask elements we set */ 2638 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2639 } 2640 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2641 2642 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2643 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2644 ierr = PetscFree(aa);CHKERRQ(ierr); 2645 ierr = PetscFree(jj);CHKERRQ(ierr); 2646 ierr = PetscFree(mask);CHKERRQ(ierr); 2647 2648 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatView_Private(B);CHKERRQ(ierr); 2651 2652 *A = B; 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatCreateSeqBAIJ" 2658 /*@C 2659 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2660 compressed row) format. For good matrix assembly performance the 2661 user should preallocate the matrix storage by setting the parameter nz 2662 (or the array nnz). By setting these parameters accurately, performance 2663 during matrix assembly can be increased by more than a factor of 50. 2664 2665 Collective on MPI_Comm 2666 2667 Input Parameters: 2668 + comm - MPI communicator, set to PETSC_COMM_SELF 2669 . bs - size of block 2670 . m - number of rows 2671 . n - number of columns 2672 . nz - number of nonzero blocks per block row (same for all rows) 2673 - nnz - array containing the number of nonzero blocks in the various block rows 2674 (possibly different for each block row) or PETSC_NULL 2675 2676 Output Parameter: 2677 . A - the matrix 2678 2679 Options Database Keys: 2680 . -mat_no_unroll - uses code that does not unroll the loops in the 2681 block calculations (much slower) 2682 . -mat_block_size - size of the blocks to use 2683 2684 Level: intermediate 2685 2686 Notes: 2687 The number of rows and columns must be divisible by blocksize. 2688 2689 If the nnz parameter is given then the nz parameter is ignored 2690 2691 A nonzero block is any block that as 1 or more nonzeros in it 2692 2693 The block AIJ format is fully compatible with standard Fortran 77 2694 storage. That is, the stored row and column indices can begin at 2695 either one (as in Fortran) or zero. See the users' manual for details. 2696 2697 Specify the preallocated storage with either nz or nnz (not both). 2698 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2699 allocation. For additional details, see the users manual chapter on 2700 matrices. 2701 2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2703 @*/ 2704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2705 { 2706 PetscErrorCode ierr; 2707 2708 PetscFunctionBegin; 2709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2710 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2711 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2712 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2718 /*@C 2719 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2720 per row in the matrix. For good matrix assembly performance the 2721 user should preallocate the matrix storage by setting the parameter nz 2722 (or the array nnz). By setting these parameters accurately, performance 2723 during matrix assembly can be increased by more than a factor of 50. 2724 2725 Collective on MPI_Comm 2726 2727 Input Parameters: 2728 + A - the matrix 2729 . bs - size of block 2730 . nz - number of block nonzeros per block row (same for all rows) 2731 - nnz - array containing the number of block nonzeros in the various block rows 2732 (possibly different for each block row) or PETSC_NULL 2733 2734 Options Database Keys: 2735 . -mat_no_unroll - uses code that does not unroll the loops in the 2736 block calculations (much slower) 2737 . -mat_block_size - size of the blocks to use 2738 2739 Level: intermediate 2740 2741 Notes: 2742 If the nnz parameter is given then the nz parameter is ignored 2743 2744 The block AIJ format is fully compatible with standard Fortran 77 2745 storage. That is, the stored row and column indices can begin at 2746 either one (as in Fortran) or zero. See the users' manual for details. 2747 2748 Specify the preallocated storage with either nz or nnz (not both). 2749 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2750 allocation. For additional details, see the users manual chapter on 2751 matrices. 2752 2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2754 @*/ 2755 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2756 { 2757 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2758 2759 PetscFunctionBegin; 2760 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2761 if (f) { 2762 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2763 } 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2769 /*@ 2770 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2771 (upper triangular entries in CSR format) provided by the user. 2772 2773 Collective on MPI_Comm 2774 2775 Input Parameters: 2776 + comm - must be an MPI communicator of size 1 2777 . bs - size of block 2778 . m - number of rows 2779 . n - number of columns 2780 . i - row indices 2781 . j - column indices 2782 - a - matrix values 2783 2784 Output Parameter: 2785 . mat - the matrix 2786 2787 Level: intermediate 2788 2789 Notes: 2790 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2791 once the matrix is destroyed 2792 2793 You cannot set new nonzero locations into this matrix, that will generate an error. 2794 2795 The i and j indices are 0 based 2796 2797 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2798 2799 @*/ 2800 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2801 { 2802 PetscErrorCode ierr; 2803 PetscInt ii; 2804 Mat_SeqBAIJ *baij; 2805 2806 PetscFunctionBegin; 2807 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2808 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2809 2810 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2811 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2812 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2813 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2814 baij = (Mat_SeqBAIJ*)(*mat)->data; 2815 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2816 2817 baij->i = i; 2818 baij->j = j; 2819 baij->a = a; 2820 baij->singlemalloc = PETSC_FALSE; 2821 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2822 baij->free_a = PETSC_FALSE; 2823 baij->free_ij = PETSC_FALSE; 2824 2825 for (ii=0; ii<m; ii++) { 2826 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2827 #if defined(PETSC_USE_DEBUG) 2828 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2829 #endif 2830 } 2831 #if defined(PETSC_USE_DEBUG) 2832 for (ii=0; ii<baij->i[m]; ii++) { 2833 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2834 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2835 } 2836 #endif 2837 2838 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842