1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 diag = a->diag; 1105 for (i=0; i<a->mbs; i++) { 1106 if (jj[diag[i]] != i) { 1107 *missing = PETSC_TRUE; 1108 if (d) *d = i; 1109 } 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,m = a->mbs; 1121 1122 PetscFunctionBegin; 1123 if (!a->diag) { 1124 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1125 } 1126 for (i=0; i<m; i++) { 1127 a->diag[i] = a->i[i+1]; 1128 for (j=a->i[i]; j<a->i[i+1]; j++) { 1129 if (a->j[j] == i) { 1130 a->diag[i] = j; 1131 break; 1132 } 1133 } 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1148 PetscInt *tia, *tja; 1149 1150 PetscFunctionBegin; 1151 *nn = n; 1152 if (!ia) PetscFunctionReturn(0); 1153 if (symmetric) { 1154 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1155 } else { 1156 tia = a->i; tja = a->j; 1157 } 1158 1159 if (!blockcompressed && bs > 1) { 1160 (*nn) *= bs; 1161 nbs = bs; 1162 /* malloc & create the natural set of indices */ 1163 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1164 if (n) { 1165 (*ia)[0] = 0; 1166 for (j=1; j<bs; j++) { 1167 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1168 } 1169 } 1170 1171 for (i=1; i<n; i++) { 1172 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1175 } 1176 } 1177 if (n) { 1178 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1179 } 1180 1181 if (ja) { 1182 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<n; i++) { 1185 for (j=0; j<bs; j++) { 1186 for (k=tia[i]; k<tia[i+1]; k++) { 1187 for (l=0; l<bs; l++) { 1188 (*ja)[cnt++] = bs*tja[k] + l; 1189 } 1190 } 1191 } 1192 } 1193 } 1194 1195 n *= bs; 1196 nz *= bs*bs; 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 ierr = PetscFree(tia);CHKERRQ(ierr); 1199 ierr = PetscFree(tja);CHKERRQ(ierr); 1200 } 1201 } else { 1202 *ia = tia; 1203 if (ja) *ja = tja; 1204 } 1205 if (oshift == 1) { 1206 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1207 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt i,n = a->mbs,nz = a->i[n]; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if (!blockcompressed && A->rmap.bs > 1) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } else if (symmetric) { 1226 ierr = PetscFree(*ia);CHKERRQ(ierr); 1227 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1228 } else if (oshift == 1) { /* blockcompressed */ 1229 for (i=0; i<n+1; i++) a->i[i]--; 1230 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 #if defined(PETSC_USE_LOG) 1244 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1245 #endif 1246 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1247 if (a->row) { 1248 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1249 } 1250 if (a->col) { 1251 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1254 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1255 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1256 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1257 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1258 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1259 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1260 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1261 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1262 1263 ierr = PetscFree(a);CHKERRQ(ierr); 1264 1265 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1278 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 switch (op) { 1285 case MAT_ROW_ORIENTED: 1286 a->roworiented = flg; 1287 break; 1288 case MAT_KEEP_ZEROED_ROWS: 1289 a->keepzeroedrows = flg; 1290 break; 1291 case MAT_NEW_NONZERO_LOCATIONS: 1292 a->nonew = (flg ? 0 : 1); 1293 break; 1294 case MAT_NEW_NONZERO_LOCATION_ERR: 1295 a->nonew = (flg ? -1 : 0); 1296 break; 1297 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1298 a->nonew = (flg ? -2 : 0); 1299 break; 1300 case MAT_NEW_DIAGONALS: 1301 case MAT_IGNORE_OFF_PROC_ENTRIES: 1302 case MAT_USE_HASH_TABLE: 1303 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1304 break; 1305 case MAT_SYMMETRIC: 1306 case MAT_STRUCTURALLY_SYMMETRIC: 1307 case MAT_HERMITIAN: 1308 case MAT_SYMMETRY_ETERNAL: 1309 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1310 break; 1311 default: 1312 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1313 } 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1319 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1320 { 1321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1324 MatScalar *aa,*aa_i; 1325 PetscScalar *v_i; 1326 1327 PetscFunctionBegin; 1328 bs = A->rmap.bs; 1329 ai = a->i; 1330 aj = a->j; 1331 aa = a->a; 1332 bs2 = a->bs2; 1333 1334 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1335 1336 bn = row/bs; /* Block number */ 1337 bp = row % bs; /* Block Position */ 1338 M = ai[bn+1] - ai[bn]; 1339 *nz = bs*M; 1340 1341 if (v) { 1342 *v = 0; 1343 if (*nz) { 1344 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1345 for (i=0; i<M; i++) { /* for each block in the block row */ 1346 v_i = *v + i*bs; 1347 aa_i = aa + bs2*(ai[bn] + i); 1348 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1349 } 1350 } 1351 } 1352 1353 if (idx) { 1354 *idx = 0; 1355 if (*nz) { 1356 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1357 for (i=0; i<M; i++) { /* for each block in the block row */ 1358 idx_i = *idx + i*bs; 1359 itmp = bs*aj[ai[bn] + i]; 1360 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1361 } 1362 } 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1369 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1370 { 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1375 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1381 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1382 { 1383 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1384 Mat C; 1385 PetscErrorCode ierr; 1386 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1387 PetscInt *rows,*cols,bs2=a->bs2; 1388 MatScalar *array; 1389 1390 PetscFunctionBegin; 1391 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1392 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1393 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1394 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1395 1396 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1397 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1398 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1399 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1400 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1401 ierr = PetscFree(col);CHKERRQ(ierr); 1402 } else { 1403 C = *B; 1404 } 1405 1406 array = a->a; 1407 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1408 cols = rows + bs; 1409 for (i=0; i<mbs; i++) { 1410 cols[0] = i*bs; 1411 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1412 len = ai[i+1] - ai[i]; 1413 for (j=0; j<len; j++) { 1414 rows[0] = (*aj++)*bs; 1415 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1416 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1417 array += bs2; 1418 } 1419 } 1420 ierr = PetscFree(rows);CHKERRQ(ierr); 1421 1422 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1423 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 1425 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1426 *B = C; 1427 } else { 1428 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1435 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1436 { 1437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1438 PetscErrorCode ierr; 1439 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1440 int fd; 1441 PetscScalar *aa; 1442 FILE *file; 1443 1444 PetscFunctionBegin; 1445 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1446 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1447 col_lens[0] = MAT_FILE_COOKIE; 1448 1449 col_lens[1] = A->rmap.N; 1450 col_lens[2] = A->cmap.n; 1451 col_lens[3] = a->nz*bs2; 1452 1453 /* store lengths of each row and write (including header) to file */ 1454 count = 0; 1455 for (i=0; i<a->mbs; i++) { 1456 for (j=0; j<bs; j++) { 1457 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1458 } 1459 } 1460 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1461 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1462 1463 /* store column indices (zero start index) */ 1464 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1465 count = 0; 1466 for (i=0; i<a->mbs; i++) { 1467 for (j=0; j<bs; j++) { 1468 for (k=a->i[i]; k<a->i[i+1]; k++) { 1469 for (l=0; l<bs; l++) { 1470 jj[count++] = bs*a->j[k] + l; 1471 } 1472 } 1473 } 1474 } 1475 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1476 ierr = PetscFree(jj);CHKERRQ(ierr); 1477 1478 /* store nonzero values */ 1479 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1480 count = 0; 1481 for (i=0; i<a->mbs; i++) { 1482 for (j=0; j<bs; j++) { 1483 for (k=a->i[i]; k<a->i[i+1]; k++) { 1484 for (l=0; l<bs; l++) { 1485 aa[count++] = a->a[bs2*k + l*bs + j]; 1486 } 1487 } 1488 } 1489 } 1490 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1491 ierr = PetscFree(aa);CHKERRQ(ierr); 1492 1493 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1494 if (file) { 1495 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1502 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1503 { 1504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1505 PetscErrorCode ierr; 1506 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1507 PetscViewerFormat format; 1508 1509 PetscFunctionBegin; 1510 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1511 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1512 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1513 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1514 Mat aij; 1515 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1516 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1517 ierr = MatDestroy(aij);CHKERRQ(ierr); 1518 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1519 PetscFunctionReturn(0); 1520 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1521 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1522 for (i=0; i<a->mbs; i++) { 1523 for (j=0; j<bs; j++) { 1524 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1525 for (k=a->i[i]; k<a->i[i+1]; k++) { 1526 for (l=0; l<bs; l++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1530 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1531 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1532 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1533 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1534 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } 1537 #else 1538 if (a->a[bs2*k + l*bs + j] != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1540 } 1541 #endif 1542 } 1543 } 1544 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1545 } 1546 } 1547 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1548 } else { 1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1550 for (i=0; i<a->mbs; i++) { 1551 for (j=0; j<bs; j++) { 1552 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1553 for (k=a->i[i]; k<a->i[i+1]; k++) { 1554 for (l=0; l<bs; l++) { 1555 #if defined(PETSC_USE_COMPLEX) 1556 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1558 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1559 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1561 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1562 } else { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } 1565 #else 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1567 #endif 1568 } 1569 } 1570 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1571 } 1572 } 1573 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1574 } 1575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1582 { 1583 Mat A = (Mat) Aa; 1584 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1585 PetscErrorCode ierr; 1586 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1587 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1588 MatScalar *aa; 1589 PetscViewer viewer; 1590 1591 PetscFunctionBegin; 1592 1593 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1594 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1595 1596 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1597 1598 /* loop over matrix elements drawing boxes */ 1599 color = PETSC_DRAW_BLUE; 1600 for (i=0,row=0; i<mbs; i++,row+=bs) { 1601 for (j=a->i[i]; j<a->i[i+1]; j++) { 1602 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1603 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1604 aa = a->a + j*bs2; 1605 for (k=0; k<bs; k++) { 1606 for (l=0; l<bs; l++) { 1607 if (PetscRealPart(*aa++) >= 0.) continue; 1608 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1609 } 1610 } 1611 } 1612 } 1613 color = PETSC_DRAW_CYAN; 1614 for (i=0,row=0; i<mbs; i++,row+=bs) { 1615 for (j=a->i[i]; j<a->i[i+1]; j++) { 1616 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1617 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1618 aa = a->a + j*bs2; 1619 for (k=0; k<bs; k++) { 1620 for (l=0; l<bs; l++) { 1621 if (PetscRealPart(*aa++) != 0.) continue; 1622 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1623 } 1624 } 1625 } 1626 } 1627 1628 color = PETSC_DRAW_RED; 1629 for (i=0,row=0; i<mbs; i++,row+=bs) { 1630 for (j=a->i[i]; j<a->i[i+1]; j++) { 1631 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1632 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1633 aa = a->a + j*bs2; 1634 for (k=0; k<bs; k++) { 1635 for (l=0; l<bs; l++) { 1636 if (PetscRealPart(*aa++) <= 0.) continue; 1637 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1638 } 1639 } 1640 } 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1647 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1648 { 1649 PetscErrorCode ierr; 1650 PetscReal xl,yl,xr,yr,w,h; 1651 PetscDraw draw; 1652 PetscTruth isnull; 1653 1654 PetscFunctionBegin; 1655 1656 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1657 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1658 1659 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1660 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1661 xr += w; yr += h; xl = -w; yl = -h; 1662 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1663 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1664 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatView_SeqBAIJ" 1670 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1671 { 1672 PetscErrorCode ierr; 1673 PetscTruth iascii,isbinary,isdraw; 1674 1675 PetscFunctionBegin; 1676 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1677 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1678 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1679 if (iascii){ 1680 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1681 } else if (isbinary) { 1682 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1683 } else if (isdraw) { 1684 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1685 } else { 1686 Mat B; 1687 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1688 ierr = MatView(B,viewer);CHKERRQ(ierr); 1689 ierr = MatDestroy(B);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1697 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1698 { 1699 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1700 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1701 PetscInt *ai = a->i,*ailen = a->ilen; 1702 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1703 MatScalar *ap,*aa = a->a; 1704 1705 PetscFunctionBegin; 1706 for (k=0; k<m; k++) { /* loop over rows */ 1707 row = im[k]; brow = row/bs; 1708 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1709 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1710 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1711 nrow = ailen[brow]; 1712 for (l=0; l<n; l++) { /* loop over columns */ 1713 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1714 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1715 col = in[l] ; 1716 bcol = col/bs; 1717 cidx = col%bs; 1718 ridx = row%bs; 1719 high = nrow; 1720 low = 0; /* assume unsorted */ 1721 while (high-low > 5) { 1722 t = (low+high)/2; 1723 if (rp[t] > bcol) high = t; 1724 else low = t; 1725 } 1726 for (i=low; i<high; i++) { 1727 if (rp[i] > bcol) break; 1728 if (rp[i] == bcol) { 1729 *v++ = ap[bs2*i+bs*cidx+ridx]; 1730 goto finished; 1731 } 1732 } 1733 *v++ = 0.0; 1734 finished:; 1735 } 1736 } 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #define CHUNKSIZE 10 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1743 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1744 { 1745 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1746 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1747 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1748 PetscErrorCode ierr; 1749 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1750 PetscTruth roworiented=a->roworiented; 1751 const PetscScalar *value = v; 1752 MatScalar *ap,*aa = a->a,*bap; 1753 1754 PetscFunctionBegin; 1755 if (roworiented) { 1756 stepval = (n-1)*bs; 1757 } else { 1758 stepval = (m-1)*bs; 1759 } 1760 for (k=0; k<m; k++) { /* loop over added rows */ 1761 row = im[k]; 1762 if (row < 0) continue; 1763 #if defined(PETSC_USE_DEBUG) 1764 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1765 #endif 1766 rp = aj + ai[row]; 1767 ap = aa + bs2*ai[row]; 1768 rmax = imax[row]; 1769 nrow = ailen[row]; 1770 low = 0; 1771 high = nrow; 1772 for (l=0; l<n; l++) { /* loop over added columns */ 1773 if (in[l] < 0) continue; 1774 #if defined(PETSC_USE_DEBUG) 1775 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1776 #endif 1777 col = in[l]; 1778 if (roworiented) { 1779 value = v + k*(stepval+bs)*bs + l*bs; 1780 } else { 1781 value = v + l*(stepval+bs)*bs + k*bs; 1782 } 1783 if (col <= lastcol) low = 0; else high = nrow; 1784 lastcol = col; 1785 while (high-low > 7) { 1786 t = (low+high)/2; 1787 if (rp[t] > col) high = t; 1788 else low = t; 1789 } 1790 for (i=low; i<high; i++) { 1791 if (rp[i] > col) break; 1792 if (rp[i] == col) { 1793 bap = ap + bs2*i; 1794 if (roworiented) { 1795 if (is == ADD_VALUES) { 1796 for (ii=0; ii<bs; ii++,value+=stepval) { 1797 for (jj=ii; jj<bs2; jj+=bs) { 1798 bap[jj] += *value++; 1799 } 1800 } 1801 } else { 1802 for (ii=0; ii<bs; ii++,value+=stepval) { 1803 for (jj=ii; jj<bs2; jj+=bs) { 1804 bap[jj] = *value++; 1805 } 1806 } 1807 } 1808 } else { 1809 if (is == ADD_VALUES) { 1810 for (ii=0; ii<bs; ii++,value+=stepval) { 1811 for (jj=0; jj<bs; jj++) { 1812 *bap++ += *value++; 1813 } 1814 } 1815 } else { 1816 for (ii=0; ii<bs; ii++,value+=stepval) { 1817 for (jj=0; jj<bs; jj++) { 1818 *bap++ = *value++; 1819 } 1820 } 1821 } 1822 } 1823 goto noinsert2; 1824 } 1825 } 1826 if (nonew == 1) goto noinsert2; 1827 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1828 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1829 N = nrow++ - 1; high++; 1830 /* shift up all the later entries in this row */ 1831 for (ii=N; ii>=i; ii--) { 1832 rp[ii+1] = rp[ii]; 1833 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1834 } 1835 if (N >= i) { 1836 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1837 } 1838 rp[i] = col; 1839 bap = ap + bs2*i; 1840 if (roworiented) { 1841 for (ii=0; ii<bs; ii++,value+=stepval) { 1842 for (jj=ii; jj<bs2; jj+=bs) { 1843 bap[jj] = *value++; 1844 } 1845 } 1846 } else { 1847 for (ii=0; ii<bs; ii++,value+=stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 *bap++ = *value++; 1850 } 1851 } 1852 } 1853 noinsert2:; 1854 low = i; 1855 } 1856 ailen[row] = nrow; 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1863 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1864 { 1865 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1866 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1867 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1868 PetscErrorCode ierr; 1869 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1870 MatScalar *aa = a->a,*ap; 1871 PetscReal ratio=0.6; 1872 1873 PetscFunctionBegin; 1874 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1875 1876 if (m) rmax = ailen[0]; 1877 for (i=1; i<mbs; i++) { 1878 /* move each row back by the amount of empty slots (fshift) before it*/ 1879 fshift += imax[i-1] - ailen[i-1]; 1880 rmax = PetscMax(rmax,ailen[i]); 1881 if (fshift) { 1882 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1883 N = ailen[i]; 1884 for (j=0; j<N; j++) { 1885 ip[j-fshift] = ip[j]; 1886 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1887 } 1888 } 1889 ai[i] = ai[i-1] + ailen[i-1]; 1890 } 1891 if (mbs) { 1892 fshift += imax[mbs-1] - ailen[mbs-1]; 1893 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1894 } 1895 /* reset ilen and imax for each row */ 1896 for (i=0; i<mbs; i++) { 1897 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1898 } 1899 a->nz = ai[mbs]; 1900 1901 /* diagonals may have moved, so kill the diagonal pointers */ 1902 a->idiagvalid = PETSC_FALSE; 1903 if (fshift && a->diag) { 1904 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1905 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1906 a->diag = 0; 1907 } 1908 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1909 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1910 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1911 a->reallocs = 0; 1912 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1913 1914 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1915 if (a->compressedrow.use){ 1916 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1917 } 1918 1919 A->same_nonzero = PETSC_TRUE; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 /* 1924 This function returns an array of flags which indicate the locations of contiguous 1925 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1926 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1927 Assume: sizes should be long enough to hold all the values. 1928 */ 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1931 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1932 { 1933 PetscInt i,j,k,row; 1934 PetscTruth flg; 1935 1936 PetscFunctionBegin; 1937 for (i=0,j=0; i<n; j++) { 1938 row = idx[i]; 1939 if (row%bs!=0) { /* Not the begining of a block */ 1940 sizes[j] = 1; 1941 i++; 1942 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1943 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1944 i++; 1945 } else { /* Begining of the block, so check if the complete block exists */ 1946 flg = PETSC_TRUE; 1947 for (k=1; k<bs; k++) { 1948 if (row+k != idx[i+k]) { /* break in the block */ 1949 flg = PETSC_FALSE; 1950 break; 1951 } 1952 } 1953 if (flg) { /* No break in the bs */ 1954 sizes[j] = bs; 1955 i+= bs; 1956 } else { 1957 sizes[j] = 1; 1958 i++; 1959 } 1960 } 1961 } 1962 *bs_max = j; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1968 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1969 { 1970 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1971 PetscErrorCode ierr; 1972 PetscInt i,j,k,count,*rows; 1973 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1974 PetscScalar zero = 0.0; 1975 MatScalar *aa; 1976 1977 PetscFunctionBegin; 1978 /* Make a copy of the IS and sort it */ 1979 /* allocate memory for rows,sizes */ 1980 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1981 sizes = rows + is_n; 1982 1983 /* copy IS values to rows, and sort them */ 1984 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1985 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1986 if (baij->keepzeroedrows) { 1987 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1988 bs_max = is_n; 1989 A->same_nonzero = PETSC_TRUE; 1990 } else { 1991 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1992 A->same_nonzero = PETSC_FALSE; 1993 } 1994 1995 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1996 row = rows[j]; 1997 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1998 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1999 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2000 if (sizes[i] == bs && !baij->keepzeroedrows) { 2001 if (diag != 0.0) { 2002 if (baij->ilen[row/bs] > 0) { 2003 baij->ilen[row/bs] = 1; 2004 baij->j[baij->i[row/bs]] = row/bs; 2005 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2006 } 2007 /* Now insert all the diagonal values for this bs */ 2008 for (k=0; k<bs; k++) { 2009 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2010 } 2011 } else { /* (diag == 0.0) */ 2012 baij->ilen[row/bs] = 0; 2013 } /* end (diag == 0.0) */ 2014 } else { /* (sizes[i] != bs) */ 2015 #if defined (PETSC_USE_DEBUG) 2016 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2017 #endif 2018 for (k=0; k<count; k++) { 2019 aa[0] = zero; 2020 aa += bs; 2021 } 2022 if (diag != 0.0) { 2023 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2024 } 2025 } 2026 } 2027 2028 ierr = PetscFree(rows);CHKERRQ(ierr); 2029 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2035 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2036 { 2037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2038 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2039 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2040 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2041 PetscErrorCode ierr; 2042 PetscInt ridx,cidx,bs2=a->bs2; 2043 PetscTruth roworiented=a->roworiented; 2044 MatScalar *ap,value,*aa=a->a,*bap; 2045 2046 PetscFunctionBegin; 2047 for (k=0; k<m; k++) { /* loop over added rows */ 2048 row = im[k]; 2049 brow = row/bs; 2050 if (row < 0) continue; 2051 #if defined(PETSC_USE_DEBUG) 2052 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2053 #endif 2054 rp = aj + ai[brow]; 2055 ap = aa + bs2*ai[brow]; 2056 rmax = imax[brow]; 2057 nrow = ailen[brow]; 2058 low = 0; 2059 high = nrow; 2060 for (l=0; l<n; l++) { /* loop over added columns */ 2061 if (in[l] < 0) continue; 2062 #if defined(PETSC_USE_DEBUG) 2063 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2064 #endif 2065 col = in[l]; bcol = col/bs; 2066 ridx = row % bs; cidx = col % bs; 2067 if (roworiented) { 2068 value = v[l + k*n]; 2069 } else { 2070 value = v[k + l*m]; 2071 } 2072 if (col <= lastcol) low = 0; else high = nrow; 2073 lastcol = col; 2074 while (high-low > 7) { 2075 t = (low+high)/2; 2076 if (rp[t] > bcol) high = t; 2077 else low = t; 2078 } 2079 for (i=low; i<high; i++) { 2080 if (rp[i] > bcol) break; 2081 if (rp[i] == bcol) { 2082 bap = ap + bs2*i + bs*cidx + ridx; 2083 if (is == ADD_VALUES) *bap += value; 2084 else *bap = value; 2085 goto noinsert1; 2086 } 2087 } 2088 if (nonew == 1) goto noinsert1; 2089 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2090 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2091 N = nrow++ - 1; high++; 2092 /* shift up all the later entries in this row */ 2093 for (ii=N; ii>=i; ii--) { 2094 rp[ii+1] = rp[ii]; 2095 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2096 } 2097 if (N>=i) { 2098 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2099 } 2100 rp[i] = bcol; 2101 ap[bs2*i + bs*cidx + ridx] = value; 2102 a->nz++; 2103 noinsert1:; 2104 low = i; 2105 } 2106 ailen[brow] = nrow; 2107 } 2108 A->same_nonzero = PETSC_FALSE; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2115 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2116 { 2117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2118 Mat outA; 2119 PetscErrorCode ierr; 2120 PetscTruth row_identity,col_identity; 2121 2122 PetscFunctionBegin; 2123 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2124 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2125 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2126 if (!row_identity || !col_identity) { 2127 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2128 } 2129 2130 outA = inA; 2131 inA->factor = FACTOR_LU; 2132 2133 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2134 2135 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2136 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2137 a->row = row; 2138 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2139 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2140 a->col = col; 2141 2142 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2143 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2145 2146 /* 2147 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2148 for ILU(0) factorization with natural ordering 2149 */ 2150 if (inA->rmap.bs < 8) { 2151 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2152 } else { 2153 if (!a->solve_work) { 2154 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2155 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2156 } 2157 } 2158 2159 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2160 2161 PetscFunctionReturn(0); 2162 } 2163 2164 EXTERN_C_BEGIN 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2167 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2168 { 2169 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2170 PetscInt i,nz,nbs; 2171 2172 PetscFunctionBegin; 2173 nz = baij->maxnz/baij->bs2; 2174 nbs = baij->nbs; 2175 for (i=0; i<nz; i++) { 2176 baij->j[i] = indices[i]; 2177 } 2178 baij->nz = nz; 2179 for (i=0; i<nbs; i++) { 2180 baij->ilen[i] = baij->imax[i]; 2181 } 2182 2183 PetscFunctionReturn(0); 2184 } 2185 EXTERN_C_END 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2189 /*@ 2190 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2191 in the matrix. 2192 2193 Input Parameters: 2194 + mat - the SeqBAIJ matrix 2195 - indices - the column indices 2196 2197 Level: advanced 2198 2199 Notes: 2200 This can be called if you have precomputed the nonzero structure of the 2201 matrix and want to provide it to the matrix object to improve the performance 2202 of the MatSetValues() operation. 2203 2204 You MUST have set the correct numbers of nonzeros per row in the call to 2205 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2206 2207 MUST be called before any calls to MatSetValues(); 2208 2209 @*/ 2210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2211 { 2212 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2213 2214 PetscFunctionBegin; 2215 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2216 PetscValidPointer(indices,2); 2217 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2218 if (f) { 2219 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2220 } else { 2221 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2222 } 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2228 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2229 { 2230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2231 PetscErrorCode ierr; 2232 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2233 PetscReal atmp; 2234 PetscScalar *x,zero = 0.0; 2235 MatScalar *aa; 2236 PetscInt ncols,brow,krow,kcol; 2237 2238 PetscFunctionBegin; 2239 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2240 bs = A->rmap.bs; 2241 aa = a->a; 2242 ai = a->i; 2243 aj = a->j; 2244 mbs = a->mbs; 2245 2246 ierr = VecSet(v,zero);CHKERRQ(ierr); 2247 if (idx) { 2248 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 2249 } 2250 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2251 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2252 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2253 for (i=0; i<mbs; i++) { 2254 ncols = ai[1] - ai[0]; ai++; 2255 brow = bs*i; 2256 for (j=0; j<ncols; j++){ 2257 for (kcol=0; kcol<bs; kcol++){ 2258 for (krow=0; krow<bs; krow++){ 2259 atmp = PetscAbsScalar(*aa);aa++; 2260 row = brow + krow; /* row index */ 2261 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2262 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2263 } 2264 } 2265 aj++; 2266 } 2267 } 2268 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatCopy_SeqBAIJ" 2274 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2275 { 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 /* If the two matrices have the same copy implementation, use fast copy. */ 2280 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2281 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2282 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2283 2284 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2285 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2286 } 2287 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2288 } else { 2289 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2296 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2297 { 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2307 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2308 { 2309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2310 PetscFunctionBegin; 2311 *array = a->a; 2312 PetscFunctionReturn(0); 2313 } 2314 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2317 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2318 { 2319 PetscFunctionBegin; 2320 PetscFunctionReturn(0); 2321 } 2322 2323 #include "petscblaslapack.h" 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2326 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2327 { 2328 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2329 PetscErrorCode ierr; 2330 PetscInt i,bs=Y->rmap.bs,j,bs2; 2331 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2332 2333 PetscFunctionBegin; 2334 if (str == SAME_NONZERO_PATTERN) { 2335 PetscScalar alpha = a; 2336 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2337 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2338 if (y->xtoy && y->XtoY != X) { 2339 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2340 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2341 } 2342 if (!y->xtoy) { /* get xtoy */ 2343 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2344 y->XtoY = X; 2345 } 2346 bs2 = bs*bs; 2347 for (i=0; i<x->nz; i++) { 2348 j = 0; 2349 while (j < bs2){ 2350 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2351 j++; 2352 } 2353 } 2354 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2355 } else { 2356 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2357 } 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2363 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2364 { 2365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2366 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2367 MatScalar *aa = a->a; 2368 2369 PetscFunctionBegin; 2370 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2376 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2377 { 2378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2379 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2380 MatScalar *aa = a->a; 2381 2382 PetscFunctionBegin; 2383 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 2388 /* -------------------------------------------------------------------*/ 2389 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2390 MatGetRow_SeqBAIJ, 2391 MatRestoreRow_SeqBAIJ, 2392 MatMult_SeqBAIJ_N, 2393 /* 4*/ MatMultAdd_SeqBAIJ_N, 2394 MatMultTranspose_SeqBAIJ, 2395 MatMultTransposeAdd_SeqBAIJ, 2396 MatSolve_SeqBAIJ_N, 2397 0, 2398 0, 2399 /*10*/ 0, 2400 MatLUFactor_SeqBAIJ, 2401 0, 2402 0, 2403 MatTranspose_SeqBAIJ, 2404 /*15*/ MatGetInfo_SeqBAIJ, 2405 MatEqual_SeqBAIJ, 2406 MatGetDiagonal_SeqBAIJ, 2407 MatDiagonalScale_SeqBAIJ, 2408 MatNorm_SeqBAIJ, 2409 /*20*/ 0, 2410 MatAssemblyEnd_SeqBAIJ, 2411 0, 2412 MatSetOption_SeqBAIJ, 2413 MatZeroEntries_SeqBAIJ, 2414 /*25*/ MatZeroRows_SeqBAIJ, 2415 MatLUFactorSymbolic_SeqBAIJ, 2416 MatLUFactorNumeric_SeqBAIJ_N, 2417 MatCholeskyFactorSymbolic_SeqBAIJ, 2418 MatCholeskyFactorNumeric_SeqBAIJ_N, 2419 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2420 MatILUFactorSymbolic_SeqBAIJ, 2421 MatICCFactorSymbolic_SeqBAIJ, 2422 MatGetArray_SeqBAIJ, 2423 MatRestoreArray_SeqBAIJ, 2424 /*35*/ MatDuplicate_SeqBAIJ, 2425 0, 2426 0, 2427 MatILUFactor_SeqBAIJ, 2428 0, 2429 /*40*/ MatAXPY_SeqBAIJ, 2430 MatGetSubMatrices_SeqBAIJ, 2431 MatIncreaseOverlap_SeqBAIJ, 2432 MatGetValues_SeqBAIJ, 2433 MatCopy_SeqBAIJ, 2434 /*45*/ 0, 2435 MatScale_SeqBAIJ, 2436 0, 2437 0, 2438 0, 2439 /*50*/ 0, 2440 MatGetRowIJ_SeqBAIJ, 2441 MatRestoreRowIJ_SeqBAIJ, 2442 0, 2443 0, 2444 /*55*/ 0, 2445 0, 2446 0, 2447 0, 2448 MatSetValuesBlocked_SeqBAIJ, 2449 /*60*/ MatGetSubMatrix_SeqBAIJ, 2450 MatDestroy_SeqBAIJ, 2451 MatView_SeqBAIJ, 2452 0, 2453 0, 2454 /*65*/ 0, 2455 0, 2456 0, 2457 0, 2458 0, 2459 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2460 MatConvert_Basic, 2461 0, 2462 0, 2463 0, 2464 /*75*/ 0, 2465 0, 2466 0, 2467 0, 2468 0, 2469 /*80*/ 0, 2470 0, 2471 0, 2472 0, 2473 MatLoad_SeqBAIJ, 2474 /*85*/ 0, 2475 0, 2476 0, 2477 0, 2478 0, 2479 /*90*/ 0, 2480 0, 2481 0, 2482 0, 2483 0, 2484 /*95*/ 0, 2485 0, 2486 0, 2487 0, 2488 0, 2489 /*100*/0, 2490 0, 2491 0, 2492 0, 2493 0, 2494 /*105*/0, 2495 MatRealPart_SeqBAIJ, 2496 MatImaginaryPart_SeqBAIJ, 2497 0, 2498 0, 2499 /*110*/0, 2500 0, 2501 0, 2502 0, 2503 MatMissingDiagonal_SeqBAIJ 2504 /*115*/ 2505 }; 2506 2507 EXTERN_C_BEGIN 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2510 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2511 { 2512 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2513 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 if (aij->nonew != 1) { 2518 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2519 } 2520 2521 /* allocate space for values if not already there */ 2522 if (!aij->saved_values) { 2523 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2524 } 2525 2526 /* copy values over */ 2527 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 EXTERN_C_END 2531 2532 EXTERN_C_BEGIN 2533 #undef __FUNCT__ 2534 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2535 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2536 { 2537 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2538 PetscErrorCode ierr; 2539 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2540 2541 PetscFunctionBegin; 2542 if (aij->nonew != 1) { 2543 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2544 } 2545 if (!aij->saved_values) { 2546 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2547 } 2548 2549 /* copy values over */ 2550 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 EXTERN_C_END 2554 2555 EXTERN_C_BEGIN 2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2557 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2558 EXTERN_C_END 2559 2560 EXTERN_C_BEGIN 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2563 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2564 { 2565 Mat_SeqBAIJ *b; 2566 PetscErrorCode ierr; 2567 PetscInt i,mbs,nbs,bs2,newbs = bs; 2568 PetscTruth flg,skipallocation = PETSC_FALSE; 2569 2570 PetscFunctionBegin; 2571 2572 if (nz == MAT_SKIP_ALLOCATION) { 2573 skipallocation = PETSC_TRUE; 2574 nz = 0; 2575 } 2576 2577 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2578 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2579 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2580 2581 if (nnz && newbs != bs) { 2582 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2583 } 2584 bs = newbs; 2585 2586 B->rmap.bs = B->cmap.bs = bs; 2587 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2588 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2589 2590 B->preallocated = PETSC_TRUE; 2591 2592 mbs = B->rmap.n/bs; 2593 nbs = B->cmap.n/bs; 2594 bs2 = bs*bs; 2595 2596 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2597 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2598 } 2599 2600 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2601 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2602 if (nnz) { 2603 for (i=0; i<mbs; i++) { 2604 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2605 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2606 } 2607 } 2608 2609 b = (Mat_SeqBAIJ*)B->data; 2610 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2611 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2612 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2613 2614 B->ops->solve = MatSolve_SeqBAIJ_Update; 2615 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2616 if (!flg) { 2617 switch (bs) { 2618 case 1: 2619 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2620 B->ops->mult = MatMult_SeqBAIJ_1; 2621 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2622 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2623 break; 2624 case 2: 2625 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2626 B->ops->mult = MatMult_SeqBAIJ_2; 2627 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2628 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2629 break; 2630 case 3: 2631 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2632 B->ops->mult = MatMult_SeqBAIJ_3; 2633 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2634 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2635 break; 2636 case 4: 2637 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2638 B->ops->mult = MatMult_SeqBAIJ_4; 2639 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2640 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2641 break; 2642 case 5: 2643 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2644 B->ops->mult = MatMult_SeqBAIJ_5; 2645 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2646 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2647 break; 2648 case 6: 2649 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2650 B->ops->mult = MatMult_SeqBAIJ_6; 2651 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2652 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2653 break; 2654 case 7: 2655 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2656 B->ops->mult = MatMult_SeqBAIJ_7; 2657 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2658 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2659 break; 2660 default: 2661 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2662 B->ops->mult = MatMult_SeqBAIJ_N; 2663 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2664 break; 2665 } 2666 } 2667 B->rmap.bs = bs; 2668 b->mbs = mbs; 2669 b->nbs = nbs; 2670 if (!skipallocation) { 2671 if (!b->imax) { 2672 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2673 } 2674 /* b->ilen will count nonzeros in each block row so far. */ 2675 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2676 if (!nnz) { 2677 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2678 else if (nz <= 0) nz = 1; 2679 for (i=0; i<mbs; i++) b->imax[i] = nz; 2680 nz = nz*mbs; 2681 } else { 2682 nz = 0; 2683 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2684 } 2685 2686 /* allocate the matrix space */ 2687 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2688 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2689 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2690 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2691 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2692 b->singlemalloc = PETSC_TRUE; 2693 b->i[0] = 0; 2694 for (i=1; i<mbs+1; i++) { 2695 b->i[i] = b->i[i-1] + b->imax[i-1]; 2696 } 2697 b->free_a = PETSC_TRUE; 2698 b->free_ij = PETSC_TRUE; 2699 } else { 2700 b->free_a = PETSC_FALSE; 2701 b->free_ij = PETSC_FALSE; 2702 } 2703 2704 B->rmap.bs = bs; 2705 b->bs2 = bs2; 2706 b->mbs = mbs; 2707 b->nz = 0; 2708 b->maxnz = nz*bs2; 2709 B->info.nz_unneeded = (PetscReal)b->maxnz; 2710 PetscFunctionReturn(0); 2711 } 2712 EXTERN_C_END 2713 2714 /*MC 2715 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2716 block sparse compressed row format. 2717 2718 Options Database Keys: 2719 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2720 2721 Level: beginner 2722 2723 .seealso: MatCreateSeqBAIJ() 2724 M*/ 2725 2726 EXTERN_C_BEGIN 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatCreate_SeqBAIJ" 2729 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2730 { 2731 PetscErrorCode ierr; 2732 PetscMPIInt size; 2733 Mat_SeqBAIJ *b; 2734 2735 PetscFunctionBegin; 2736 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2737 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2738 2739 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2740 B->data = (void*)b; 2741 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2742 B->factor = 0; 2743 B->mapping = 0; 2744 b->row = 0; 2745 b->col = 0; 2746 b->icol = 0; 2747 b->reallocs = 0; 2748 b->saved_values = 0; 2749 2750 b->roworiented = PETSC_TRUE; 2751 b->nonew = 0; 2752 b->diag = 0; 2753 b->solve_work = 0; 2754 b->mult_work = 0; 2755 B->spptr = 0; 2756 B->info.nz_unneeded = (PetscReal)b->maxnz; 2757 b->keepzeroedrows = PETSC_FALSE; 2758 b->xtoy = 0; 2759 b->XtoY = 0; 2760 b->compressedrow.use = PETSC_FALSE; 2761 b->compressedrow.nrows = 0; 2762 b->compressedrow.i = PETSC_NULL; 2763 b->compressedrow.rindex = PETSC_NULL; 2764 b->compressedrow.checked = PETSC_FALSE; 2765 B->same_nonzero = PETSC_FALSE; 2766 2767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2768 "MatInvertBlockDiagonal_SeqBAIJ", 2769 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2771 "MatStoreValues_SeqBAIJ", 2772 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2774 "MatRetrieveValues_SeqBAIJ", 2775 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2776 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2777 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2778 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2780 "MatConvert_SeqBAIJ_SeqAIJ", 2781 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2782 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2783 "MatConvert_SeqBAIJ_SeqSBAIJ", 2784 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2785 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2786 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2787 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2788 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2789 PetscFunctionReturn(0); 2790 } 2791 EXTERN_C_END 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2795 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2796 { 2797 Mat C; 2798 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2799 PetscErrorCode ierr; 2800 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2801 2802 PetscFunctionBegin; 2803 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2804 2805 *B = 0; 2806 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2807 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2808 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2809 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2810 c = (Mat_SeqBAIJ*)C->data; 2811 2812 C->rmap.N = A->rmap.N; 2813 C->cmap.N = A->cmap.N; 2814 C->rmap.bs = A->rmap.bs; 2815 c->bs2 = a->bs2; 2816 c->mbs = a->mbs; 2817 c->nbs = a->nbs; 2818 2819 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2820 for (i=0; i<mbs; i++) { 2821 c->imax[i] = a->imax[i]; 2822 c->ilen[i] = a->ilen[i]; 2823 } 2824 2825 /* allocate the matrix space */ 2826 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2827 c->singlemalloc = PETSC_TRUE; 2828 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2829 if (mbs > 0) { 2830 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2831 if (cpvalues == MAT_COPY_VALUES) { 2832 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2833 } else { 2834 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2835 } 2836 } 2837 c->roworiented = a->roworiented; 2838 c->nonew = a->nonew; 2839 2840 if (a->diag) { 2841 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2842 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2843 for (i=0; i<mbs; i++) { 2844 c->diag[i] = a->diag[i]; 2845 } 2846 } else c->diag = 0; 2847 c->nz = a->nz; 2848 c->maxnz = a->maxnz; 2849 c->solve_work = 0; 2850 c->mult_work = 0; 2851 c->free_a = PETSC_TRUE; 2852 c->free_ij = PETSC_TRUE; 2853 C->preallocated = PETSC_TRUE; 2854 C->assembled = PETSC_TRUE; 2855 2856 c->compressedrow.use = a->compressedrow.use; 2857 c->compressedrow.nrows = a->compressedrow.nrows; 2858 c->compressedrow.checked = a->compressedrow.checked; 2859 if ( a->compressedrow.checked && a->compressedrow.use){ 2860 i = a->compressedrow.nrows; 2861 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2862 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2863 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2864 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2865 } else { 2866 c->compressedrow.use = PETSC_FALSE; 2867 c->compressedrow.i = PETSC_NULL; 2868 c->compressedrow.rindex = PETSC_NULL; 2869 } 2870 C->same_nonzero = A->same_nonzero; 2871 *B = C; 2872 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatLoad_SeqBAIJ" 2878 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2879 { 2880 Mat_SeqBAIJ *a; 2881 Mat B; 2882 PetscErrorCode ierr; 2883 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2884 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2885 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2886 PetscInt *masked,nmask,tmp,bs2,ishift; 2887 PetscMPIInt size; 2888 int fd; 2889 PetscScalar *aa; 2890 MPI_Comm comm = ((PetscObject)viewer)->comm; 2891 2892 PetscFunctionBegin; 2893 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2894 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2895 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2896 bs2 = bs*bs; 2897 2898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2899 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2900 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2901 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2902 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2903 M = header[1]; N = header[2]; nz = header[3]; 2904 2905 if (header[3] < 0) { 2906 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2907 } 2908 2909 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2910 2911 /* 2912 This code adds extra rows to make sure the number of rows is 2913 divisible by the blocksize 2914 */ 2915 mbs = M/bs; 2916 extra_rows = bs - M + bs*(mbs); 2917 if (extra_rows == bs) extra_rows = 0; 2918 else mbs++; 2919 if (extra_rows) { 2920 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2921 } 2922 2923 /* read in row lengths */ 2924 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2925 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2926 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2927 2928 /* read in column indices */ 2929 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2930 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2931 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2932 2933 /* loop over row lengths determining block row lengths */ 2934 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2935 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2936 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2937 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2938 masked = mask + mbs; 2939 rowcount = 0; nzcount = 0; 2940 for (i=0; i<mbs; i++) { 2941 nmask = 0; 2942 for (j=0; j<bs; j++) { 2943 kmax = rowlengths[rowcount]; 2944 for (k=0; k<kmax; k++) { 2945 tmp = jj[nzcount++]/bs; 2946 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2947 } 2948 rowcount++; 2949 } 2950 browlengths[i] += nmask; 2951 /* zero out the mask elements we set */ 2952 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2953 } 2954 2955 /* create our matrix */ 2956 ierr = MatCreate(comm,&B); 2957 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2958 ierr = MatSetType(B,type);CHKERRQ(ierr); 2959 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2960 a = (Mat_SeqBAIJ*)B->data; 2961 2962 /* set matrix "i" values */ 2963 a->i[0] = 0; 2964 for (i=1; i<= mbs; i++) { 2965 a->i[i] = a->i[i-1] + browlengths[i-1]; 2966 a->ilen[i-1] = browlengths[i-1]; 2967 } 2968 a->nz = 0; 2969 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2970 2971 /* read in nonzero values */ 2972 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2973 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2974 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2975 2976 /* set "a" and "j" values into matrix */ 2977 nzcount = 0; jcount = 0; 2978 for (i=0; i<mbs; i++) { 2979 nzcountb = nzcount; 2980 nmask = 0; 2981 for (j=0; j<bs; j++) { 2982 kmax = rowlengths[i*bs+j]; 2983 for (k=0; k<kmax; k++) { 2984 tmp = jj[nzcount++]/bs; 2985 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2986 } 2987 } 2988 /* sort the masked values */ 2989 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2990 2991 /* set "j" values into matrix */ 2992 maskcount = 1; 2993 for (j=0; j<nmask; j++) { 2994 a->j[jcount++] = masked[j]; 2995 mask[masked[j]] = maskcount++; 2996 } 2997 /* set "a" values into matrix */ 2998 ishift = bs2*a->i[i]; 2999 for (j=0; j<bs; j++) { 3000 kmax = rowlengths[i*bs+j]; 3001 for (k=0; k<kmax; k++) { 3002 tmp = jj[nzcountb]/bs ; 3003 block = mask[tmp] - 1; 3004 point = jj[nzcountb] - bs*tmp; 3005 idx = ishift + bs2*block + j + bs*point; 3006 a->a[idx] = (MatScalar)aa[nzcountb++]; 3007 } 3008 } 3009 /* zero out the mask elements we set */ 3010 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3011 } 3012 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3013 3014 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3015 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3016 ierr = PetscFree(aa);CHKERRQ(ierr); 3017 ierr = PetscFree(jj);CHKERRQ(ierr); 3018 ierr = PetscFree(mask);CHKERRQ(ierr); 3019 3020 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3021 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 ierr = MatView_Private(B);CHKERRQ(ierr); 3023 3024 *A = B; 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatCreateSeqBAIJ" 3030 /*@C 3031 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3032 compressed row) format. For good matrix assembly performance the 3033 user should preallocate the matrix storage by setting the parameter nz 3034 (or the array nnz). By setting these parameters accurately, performance 3035 during matrix assembly can be increased by more than a factor of 50. 3036 3037 Collective on MPI_Comm 3038 3039 Input Parameters: 3040 + comm - MPI communicator, set to PETSC_COMM_SELF 3041 . bs - size of block 3042 . m - number of rows 3043 . n - number of columns 3044 . nz - number of nonzero blocks per block row (same for all rows) 3045 - nnz - array containing the number of nonzero blocks in the various block rows 3046 (possibly different for each block row) or PETSC_NULL 3047 3048 Output Parameter: 3049 . A - the matrix 3050 3051 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3052 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3053 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3054 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3055 3056 Options Database Keys: 3057 . -mat_no_unroll - uses code that does not unroll the loops in the 3058 block calculations (much slower) 3059 . -mat_block_size - size of the blocks to use 3060 3061 Level: intermediate 3062 3063 Notes: 3064 The number of rows and columns must be divisible by blocksize. 3065 3066 If the nnz parameter is given then the nz parameter is ignored 3067 3068 A nonzero block is any block that as 1 or more nonzeros in it 3069 3070 The block AIJ format is fully compatible with standard Fortran 77 3071 storage. That is, the stored row and column indices can begin at 3072 either one (as in Fortran) or zero. See the users' manual for details. 3073 3074 Specify the preallocated storage with either nz or nnz (not both). 3075 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3076 allocation. For additional details, see the users manual chapter on 3077 matrices. 3078 3079 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3080 @*/ 3081 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3082 { 3083 PetscErrorCode ierr; 3084 3085 PetscFunctionBegin; 3086 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3087 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3088 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3089 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3095 /*@C 3096 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3097 per row in the matrix. For good matrix assembly performance the 3098 user should preallocate the matrix storage by setting the parameter nz 3099 (or the array nnz). By setting these parameters accurately, performance 3100 during matrix assembly can be increased by more than a factor of 50. 3101 3102 Collective on MPI_Comm 3103 3104 Input Parameters: 3105 + A - the matrix 3106 . bs - size of block 3107 . nz - number of block nonzeros per block row (same for all rows) 3108 - nnz - array containing the number of block nonzeros in the various block rows 3109 (possibly different for each block row) or PETSC_NULL 3110 3111 Options Database Keys: 3112 . -mat_no_unroll - uses code that does not unroll the loops in the 3113 block calculations (much slower) 3114 . -mat_block_size - size of the blocks to use 3115 3116 Level: intermediate 3117 3118 Notes: 3119 If the nnz parameter is given then the nz parameter is ignored 3120 3121 You can call MatGetInfo() to get information on how effective the preallocation was; 3122 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3123 You can also run with the option -info and look for messages with the string 3124 malloc in them to see if additional memory allocation was needed. 3125 3126 The block AIJ format is fully compatible with standard Fortran 77 3127 storage. That is, the stored row and column indices can begin at 3128 either one (as in Fortran) or zero. See the users' manual for details. 3129 3130 Specify the preallocated storage with either nz or nnz (not both). 3131 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3132 allocation. For additional details, see the users manual chapter on 3133 matrices. 3134 3135 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3136 @*/ 3137 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3138 { 3139 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3140 3141 PetscFunctionBegin; 3142 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3143 if (f) { 3144 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3145 } 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3151 /*@ 3152 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3153 (upper triangular entries in CSR format) provided by the user. 3154 3155 Collective on MPI_Comm 3156 3157 Input Parameters: 3158 + comm - must be an MPI communicator of size 1 3159 . bs - size of block 3160 . m - number of rows 3161 . n - number of columns 3162 . i - row indices 3163 . j - column indices 3164 - a - matrix values 3165 3166 Output Parameter: 3167 . mat - the matrix 3168 3169 Level: intermediate 3170 3171 Notes: 3172 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3173 once the matrix is destroyed 3174 3175 You cannot set new nonzero locations into this matrix, that will generate an error. 3176 3177 The i and j indices are 0 based 3178 3179 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3180 3181 @*/ 3182 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3183 { 3184 PetscErrorCode ierr; 3185 PetscInt ii; 3186 Mat_SeqBAIJ *baij; 3187 3188 PetscFunctionBegin; 3189 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3190 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3191 3192 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3193 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3194 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3195 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3196 baij = (Mat_SeqBAIJ*)(*mat)->data; 3197 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3198 3199 baij->i = i; 3200 baij->j = j; 3201 baij->a = a; 3202 baij->singlemalloc = PETSC_FALSE; 3203 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3204 baij->free_a = PETSC_FALSE; 3205 baij->free_ij = PETSC_FALSE; 3206 3207 for (ii=0; ii<m; ii++) { 3208 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3209 #if defined(PETSC_USE_DEBUG) 3210 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3211 #endif 3212 } 3213 #if defined(PETSC_USE_DEBUG) 3214 for (ii=0; ii<baij->i[m]; ii++) { 3215 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3216 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3217 } 3218 #endif 3219 3220 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3221 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224