1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,idx; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 while (nz--) { 175 idx = (*vi++); 176 x1 = x[idx]; 177 s1 -= v[0]*x1; 178 v += 1; 179 } 180 x[i2] = idiag[0]*s1; 181 idiag += 1; 182 i2 += 1; 183 } 184 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 185 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 186 } 187 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 188 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 189 i2 = 0; 190 mdiag = a->idiag+a->mbs; 191 for (i=0; i<m; i++) { 192 x1 = x[i2]; 193 x[i2] = mdiag[0]*x1; 194 mdiag += 1; 195 i2 += 1; 196 } 197 ierr = PetscLogFlops(m);CHKERRQ(ierr); 198 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 199 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 200 } 201 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 202 idiag = a->idiag+a->mbs - 1; 203 i2 = m - 1; 204 x1 = x[i2]; 205 x[i2] = idiag[0]*x1; 206 idiag -= 1; 207 i2 -= 1; 208 for (i=m-2; i>=0; i--) { 209 v = aa + (diag[i]+1); 210 vi = aj + diag[i] + 1; 211 nz = ai[i+1] - diag[i] - 1; 212 s1 = x[i2]; 213 while (nz--) { 214 idx = (*vi++); 215 x1 = x[idx]; 216 s1 -= v[0]*x1; 217 v += 1; 218 } 219 x[i2] = idiag[0]*s1; 220 idiag -= 1; 221 i2 -= 1; 222 } 223 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 224 } 225 } else { 226 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 227 } 228 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 229 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *x,x1,x2,s1,s2; 239 const PetscScalar *b; 240 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 241 PetscErrorCode ierr; 242 PetscInt m = a->mbs,i,i2,nz,idx; 243 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 244 245 PetscFunctionBegin; 246 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 247 its = its*lits; 248 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 249 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 250 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 251 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 252 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 253 254 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 255 256 diag = a->diag; 257 idiag = a->idiag; 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 260 261 if (flag & SOR_ZERO_INITIAL_GUESS) { 262 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 263 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 264 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 265 i2 = 2; 266 idiag += 4; 267 for (i=1; i<m; i++) { 268 v = aa + 4*ai[i]; 269 vi = aj + ai[i]; 270 nz = diag[i] - ai[i]; 271 s1 = b[i2]; s2 = b[i2+1]; 272 while (nz--) { 273 idx = 2*(*vi++); 274 x1 = x[idx]; x2 = x[1+idx]; 275 s1 -= v[0]*x1 + v[2]*x2; 276 s2 -= v[1]*x1 + v[3]*x2; 277 v += 4; 278 } 279 x[i2] = idiag[0]*s1 + idiag[2]*s2; 280 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 281 idiag += 4; 282 i2 += 2; 283 } 284 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 285 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 286 } 287 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 288 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 289 i2 = 0; 290 mdiag = a->idiag+4*a->mbs; 291 for (i=0; i<m; i++) { 292 x1 = x[i2]; x2 = x[i2+1]; 293 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 294 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 295 mdiag += 4; 296 i2 += 2; 297 } 298 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 299 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 300 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 301 } 302 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 303 idiag = a->idiag+4*a->mbs - 4; 304 i2 = 2*m - 2; 305 x1 = x[i2]; x2 = x[i2+1]; 306 x[i2] = idiag[0]*x1 + idiag[2]*x2; 307 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 308 idiag -= 4; 309 i2 -= 2; 310 for (i=m-2; i>=0; i--) { 311 v = aa + 4*(diag[i]+1); 312 vi = aj + diag[i] + 1; 313 nz = ai[i+1] - diag[i] - 1; 314 s1 = x[i2]; s2 = x[i2+1]; 315 while (nz--) { 316 idx = 2*(*vi++); 317 x1 = x[idx]; x2 = x[1+idx]; 318 s1 -= v[0]*x1 + v[2]*x2; 319 s2 -= v[1]*x1 + v[3]*x2; 320 v += 4; 321 } 322 x[i2] = idiag[0]*s1 + idiag[2]*s2; 323 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 324 idiag -= 4; 325 i2 -= 2; 326 } 327 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 328 } 329 } else { 330 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 331 } 332 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 333 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 340 { 341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 342 PetscScalar *x,x1,x2,x3,s1,s2,s3; 343 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 344 const PetscScalar *b; 345 PetscErrorCode ierr; 346 PetscInt m = a->mbs,i,i2,nz,idx; 347 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 348 349 PetscFunctionBegin; 350 its = its*lits; 351 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 352 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 353 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 354 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 355 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 356 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 357 358 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 359 360 diag = a->diag; 361 idiag = a->idiag; 362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 363 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 364 365 if (flag & SOR_ZERO_INITIAL_GUESS) { 366 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 367 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 368 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 369 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 370 i2 = 3; 371 idiag += 9; 372 for (i=1; i<m; i++) { 373 v = aa + 9*ai[i]; 374 vi = aj + ai[i]; 375 nz = diag[i] - ai[i]; 376 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 377 while (nz--) { 378 idx = 3*(*vi++); 379 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 380 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 381 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 382 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 383 v += 9; 384 } 385 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 386 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 387 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 388 idiag += 9; 389 i2 += 3; 390 } 391 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 392 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 393 } 394 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 395 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 396 i2 = 0; 397 mdiag = a->idiag+9*a->mbs; 398 for (i=0; i<m; i++) { 399 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 400 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 401 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 402 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 403 mdiag += 9; 404 i2 += 3; 405 } 406 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 407 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 408 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 409 } 410 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 411 idiag = a->idiag+9*a->mbs - 9; 412 i2 = 3*m - 3; 413 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 414 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 415 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 416 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 417 idiag -= 9; 418 i2 -= 3; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 9*(diag[i]+1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 424 while (nz--) { 425 idx = 3*(*vi++); 426 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 427 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 428 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 429 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 430 v += 9; 431 } 432 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 433 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 434 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 435 idiag -= 9; 436 i2 -= 3; 437 } 438 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 439 } 440 } else { 441 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 442 } 443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 451 { 452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 453 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 454 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 455 const PetscScalar *b; 456 PetscErrorCode ierr; 457 PetscInt m = a->mbs,i,i2,nz,idx; 458 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 459 460 PetscFunctionBegin; 461 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 462 its = its*lits; 463 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 464 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 465 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 466 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 467 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 468 469 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 470 471 diag = a->diag; 472 idiag = a->idiag; 473 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 475 476 if (flag & SOR_ZERO_INITIAL_GUESS) { 477 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 478 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 479 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 480 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 481 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 482 i2 = 4; 483 idiag += 16; 484 for (i=1; i<m; i++) { 485 v = aa + 16*ai[i]; 486 vi = aj + ai[i]; 487 nz = diag[i] - ai[i]; 488 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 489 while (nz--) { 490 idx = 4*(*vi++); 491 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 492 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 493 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 494 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 495 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 496 v += 16; 497 } 498 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 499 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 500 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 501 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 502 idiag += 16; 503 i2 += 4; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+16*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 515 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 516 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 517 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 518 mdiag += 16; 519 i2 += 4; 520 } 521 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 522 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 523 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 524 } 525 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 526 idiag = a->idiag+16*a->mbs - 16; 527 i2 = 4*m - 4; 528 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 529 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 530 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 531 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 532 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 533 idiag -= 16; 534 i2 -= 4; 535 for (i=m-2; i>=0; i--) { 536 v = aa + 16*(diag[i]+1); 537 vi = aj + diag[i] + 1; 538 nz = ai[i+1] - diag[i] - 1; 539 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 540 while (nz--) { 541 idx = 4*(*vi++); 542 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 543 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 544 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 545 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 546 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 547 v += 16; 548 } 549 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 550 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 551 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 552 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 553 idiag -= 16; 554 i2 -= 4; 555 } 556 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 569 { 570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 571 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 572 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 573 const PetscScalar *b; 574 PetscErrorCode ierr; 575 PetscInt m = a->mbs,i,i2,nz,idx; 576 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 577 578 PetscFunctionBegin; 579 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 580 its = its*lits; 581 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 582 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 583 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 584 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 585 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 586 587 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 588 589 diag = a->diag; 590 idiag = a->idiag; 591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 592 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 593 594 if (flag & SOR_ZERO_INITIAL_GUESS) { 595 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 596 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 597 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 598 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 599 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 600 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 601 i2 = 5; 602 idiag += 25; 603 for (i=1; i<m; i++) { 604 v = aa + 25*ai[i]; 605 vi = aj + ai[i]; 606 nz = diag[i] - ai[i]; 607 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 608 while (nz--) { 609 idx = 5*(*vi++); 610 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 611 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 612 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 613 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 614 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 615 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 616 v += 25; 617 } 618 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 619 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 620 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 621 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 622 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 623 idiag += 25; 624 i2 += 5; 625 } 626 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 627 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 628 } 629 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 630 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 631 i2 = 0; 632 mdiag = a->idiag+25*a->mbs; 633 for (i=0; i<m; i++) { 634 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 635 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 636 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 637 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 638 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 639 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 640 mdiag += 25; 641 i2 += 5; 642 } 643 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 644 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 645 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 646 } 647 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 648 idiag = a->idiag+25*a->mbs - 25; 649 i2 = 5*m - 5; 650 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 651 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 652 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 653 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 654 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 655 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 656 idiag -= 25; 657 i2 -= 5; 658 for (i=m-2; i>=0; i--) { 659 v = aa + 25*(diag[i]+1); 660 vi = aj + diag[i] + 1; 661 nz = ai[i+1] - diag[i] - 1; 662 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 663 while (nz--) { 664 idx = 5*(*vi++); 665 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 666 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 667 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 668 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 669 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 670 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 671 v += 25; 672 } 673 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 674 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 675 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 676 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 677 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 678 idiag -= 25; 679 i2 -= 5; 680 } 681 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 682 } 683 } else { 684 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 685 } 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 694 { 695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 696 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 697 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 698 const PetscScalar *b; 699 PetscErrorCode ierr; 700 PetscInt m = a->mbs,i,i2,nz,idx; 701 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 702 703 PetscFunctionBegin; 704 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 705 its = its*lits; 706 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 707 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 708 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 709 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 710 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 711 712 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 713 714 diag = a->diag; 715 idiag = a->idiag; 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 718 719 if (flag & SOR_ZERO_INITIAL_GUESS) { 720 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 721 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 722 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 723 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 724 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 725 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 726 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 727 i2 = 6; 728 idiag += 36; 729 for (i=1; i<m; i++) { 730 v = aa + 36*ai[i]; 731 vi = aj + ai[i]; 732 nz = diag[i] - ai[i]; 733 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 734 while (nz--) { 735 idx = 6*(*vi++); 736 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 737 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 738 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 739 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 740 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 741 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 742 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 743 v += 36; 744 } 745 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 746 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 747 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 748 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 749 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 750 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 751 idiag += 36; 752 i2 += 6; 753 } 754 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 755 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 756 } 757 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 758 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 759 i2 = 0; 760 mdiag = a->idiag+36*a->mbs; 761 for (i=0; i<m; i++) { 762 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 763 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 764 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 765 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 766 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 767 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 768 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 769 mdiag += 36; 770 i2 += 6; 771 } 772 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 773 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 774 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 775 } 776 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 777 idiag = a->idiag+36*a->mbs - 36; 778 i2 = 6*m - 6; 779 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 780 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 781 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 782 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 783 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 784 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 785 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 786 idiag -= 36; 787 i2 -= 6; 788 for (i=m-2; i>=0; i--) { 789 v = aa + 36*(diag[i]+1); 790 vi = aj + diag[i] + 1; 791 nz = ai[i+1] - diag[i] - 1; 792 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 793 while (nz--) { 794 idx = 6*(*vi++); 795 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 796 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 797 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 798 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 799 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 800 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 801 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 802 v += 36; 803 } 804 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 805 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 806 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 807 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 808 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 809 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 810 idiag -= 36; 811 i2 -= 6; 812 } 813 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 814 } 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 817 } 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 826 { 827 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 828 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 829 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 830 const PetscScalar *b; 831 PetscErrorCode ierr; 832 PetscInt m = a->mbs,i,i2,nz,idx; 833 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 834 835 PetscFunctionBegin; 836 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 837 its = its*lits; 838 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 839 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 840 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 841 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 842 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 843 844 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 845 846 diag = a->diag; 847 idiag = a->idiag; 848 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 849 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 850 851 if (flag & SOR_ZERO_INITIAL_GUESS) { 852 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 853 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 854 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 855 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 856 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 857 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 858 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 859 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 860 i2 = 7; 861 idiag += 49; 862 for (i=1; i<m; i++) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = diag[i] - ai[i]; 866 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 867 while (nz--) { 868 idx = 7*(*vi++); 869 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 870 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 871 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 872 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 873 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 874 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 875 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 876 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 877 v += 49; 878 } 879 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 880 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 881 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 882 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 883 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 884 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 885 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 886 idiag += 49; 887 i2 += 7; 888 } 889 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 890 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 891 } 892 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 893 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 894 i2 = 0; 895 mdiag = a->idiag+49*a->mbs; 896 for (i=0; i<m; i++) { 897 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 898 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 899 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 900 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 901 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 902 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 903 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 904 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 905 mdiag += 36; 906 i2 += 6; 907 } 908 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 909 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 910 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 911 } 912 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 913 idiag = a->idiag+49*a->mbs - 49; 914 i2 = 7*m - 7; 915 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 916 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 917 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 918 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 919 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 920 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 921 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 922 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 923 idiag -= 49; 924 i2 -= 7; 925 for (i=m-2; i>=0; i--) { 926 v = aa + 49*(diag[i]+1); 927 vi = aj + diag[i] + 1; 928 nz = ai[i+1] - diag[i] - 1; 929 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 930 while (nz--) { 931 idx = 7*(*vi++); 932 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 933 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 934 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 935 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 936 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 937 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 938 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 939 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 940 v += 49; 941 } 942 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 943 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 944 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 945 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 946 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 947 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 948 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 949 idiag -= 49; 950 i2 -= 7; 951 } 952 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 953 } 954 } else { 955 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 956 } 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 /* 963 Special version for direct calls from Fortran (Used in PETSc-fun3d) 964 */ 965 #if defined(PETSC_HAVE_FORTRAN_CAPS) 966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 968 #define matsetvaluesblocked4_ matsetvaluesblocked4 969 #endif 970 971 EXTERN_C_BEGIN 972 #undef __FUNCT__ 973 #define __FUNCT__ "matsetvaluesblocked4_" 974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 975 { 976 Mat A = *AA; 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 979 PetscInt *ai=a->i,*ailen=a->ilen; 980 PetscInt *aj=a->j,stepval,lastcol = -1; 981 const PetscScalar *value = v; 982 MatScalar *ap,*aa = a->a,*bap; 983 984 PetscFunctionBegin; 985 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 986 stepval = (n-1)*4; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; 989 rp = aj + ai[row]; 990 ap = aa + 16*ai[row]; 991 nrow = ailen[row]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; 996 if (col <= lastcol) low = 0; else high = nrow; 997 lastcol = col; 998 value = v + k*(stepval+4 + l)*4; 999 while (high-low > 7) { 1000 t = (low+high)/2; 1001 if (rp[t] > col) high = t; 1002 else low = t; 1003 } 1004 for (i=low; i<high; i++) { 1005 if (rp[i] > col) break; 1006 if (rp[i] == col) { 1007 bap = ap + 16*i; 1008 for (ii=0; ii<4; ii++,value+=stepval) { 1009 for (jj=ii; jj<16; jj+=4) { 1010 bap[jj] += *value++; 1011 } 1012 } 1013 goto noinsert2; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column index thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 for (ii=N; ii>=i; ii--) { 1020 rp[ii+1] = rp[ii]; 1021 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1022 } 1023 if (N >= i) { 1024 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1025 } 1026 rp[i] = col; 1027 bap = ap + 16*i; 1028 for (ii=0; ii<4; ii++,value+=stepval) { 1029 for (jj=ii; jj<16; jj+=4) { 1030 bap[jj] = *value++; 1031 } 1032 } 1033 noinsert2:; 1034 low = i; 1035 } 1036 ailen[row] = nrow; 1037 } 1038 PetscFunctionReturnVoid(); 1039 } 1040 EXTERN_C_END 1041 1042 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1043 #define matsetvalues4_ MATSETVALUES4 1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1045 #define matsetvalues4_ matsetvalues4 1046 #endif 1047 1048 EXTERN_C_BEGIN 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatSetValues4_" 1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1052 { 1053 Mat A = *AA; 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1056 PetscInt *ai=a->i,*ailen=a->ilen; 1057 PetscInt *aj=a->j,brow,bcol; 1058 PetscInt ridx,cidx,lastcol = -1; 1059 MatScalar *ap,value,*aa=a->a,*bap; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<m; k++) { /* loop over added rows */ 1063 row = im[k]; brow = row/4; 1064 rp = aj + ai[brow]; 1065 ap = aa + 16*ai[brow]; 1066 nrow = ailen[brow]; 1067 low = 0; 1068 high = nrow; 1069 for (l=0; l<n; l++) { /* loop over added columns */ 1070 col = in[l]; bcol = col/4; 1071 ridx = row % 4; cidx = col % 4; 1072 value = v[l + k*n]; 1073 if (col <= lastcol) low = 0; else high = nrow; 1074 lastcol = col; 1075 while (high-low > 7) { 1076 t = (low+high)/2; 1077 if (rp[t] > bcol) high = t; 1078 else low = t; 1079 } 1080 for (i=low; i<high; i++) { 1081 if (rp[i] > bcol) break; 1082 if (rp[i] == bcol) { 1083 bap = ap + 16*i + 4*cidx + ridx; 1084 *bap += value; 1085 goto noinsert1; 1086 } 1087 } 1088 N = nrow++ - 1; 1089 high++; /* added new column thus must search to one higher than before */ 1090 /* shift up all the later entries in this row */ 1091 for (ii=N; ii>=i; ii--) { 1092 rp[ii+1] = rp[ii]; 1093 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1094 } 1095 if (N>=i) { 1096 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1097 } 1098 rp[i] = bcol; 1099 ap[16*i + 4*cidx + ridx] = value; 1100 noinsert1:; 1101 low = i; 1102 } 1103 ailen[brow] = nrow; 1104 } 1105 PetscFunctionReturnVoid(); 1106 } 1107 EXTERN_C_END 1108 1109 /* 1110 Checks for missing diagonals 1111 */ 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt *diag,*jj = a->j,i; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1122 *missing = PETSC_FALSE; 1123 if (A->rmap->n > 0 && !jj) { 1124 *missing = PETSC_TRUE; 1125 if (d) *d = 0; 1126 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1127 } else { 1128 diag = a->diag; 1129 for (i=0; i<a->mbs; i++) { 1130 if (jj[diag[i]] != i) { 1131 *missing = PETSC_TRUE; 1132 if (d) *d = i; 1133 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1134 } 1135 } 1136 } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1143 { 1144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscInt i,j,m = a->mbs; 1147 1148 PetscFunctionBegin; 1149 if (!a->diag) { 1150 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1151 } 1152 for (i=0; i<m; i++) { 1153 a->diag[i] = a->i[i+1]; 1154 for (j=a->i[i]; j<a->i[i+1]; j++) { 1155 if (a->j[j] == i) { 1156 a->diag[i] = j; 1157 break; 1158 } 1159 } 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1170 { 1171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1172 PetscErrorCode ierr; 1173 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1174 PetscInt *tia, *tja; 1175 1176 PetscFunctionBegin; 1177 *nn = n; 1178 if (!ia) PetscFunctionReturn(0); 1179 if (symmetric) { 1180 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1181 } else { 1182 tia = a->i; tja = a->j; 1183 } 1184 1185 if (!blockcompressed && bs > 1) { 1186 (*nn) *= bs; 1187 nbs = bs; 1188 /* malloc & create the natural set of indices */ 1189 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1190 if (n) { 1191 (*ia)[0] = 0; 1192 for (j=1; j<bs; j++) { 1193 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1194 } 1195 } 1196 1197 for (i=1; i<n; i++) { 1198 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1199 for (j=1; j<bs; j++) { 1200 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1201 } 1202 } 1203 if (n) { 1204 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1205 } 1206 1207 if (ja) { 1208 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<n; i++) { 1211 for (j=0; j<bs; j++) { 1212 for (k=tia[i]; k<tia[i+1]; k++) { 1213 for (l=0; l<bs; l++) { 1214 (*ja)[cnt++] = bs*tja[k] + l; 1215 } 1216 } 1217 } 1218 } 1219 } 1220 1221 n *= bs; 1222 nz *= bs*bs; 1223 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1224 ierr = PetscFree(tia);CHKERRQ(ierr); 1225 ierr = PetscFree(tja);CHKERRQ(ierr); 1226 } 1227 } else if (oshift == 1) { 1228 if (symmetric) { 1229 PetscInt nz = tia[A->rmap->n/bs]; 1230 /* add 1 to i and j indices */ 1231 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1232 *ia = tia; 1233 if (ja) { 1234 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1235 *ja = tja; 1236 } 1237 } else { 1238 PetscInt nz = a->i[A->rmap->n/bs]; 1239 /* malloc space and add 1 to i and j indices */ 1240 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1241 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1242 if (ja) { 1243 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1244 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1245 } 1246 } 1247 } else { 1248 *ia = tia; 1249 if (ja) *ja = tja; 1250 } 1251 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1257 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1258 { 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 if (!ia) PetscFunctionReturn(0); 1263 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1264 ierr = PetscFree(*ia);CHKERRQ(ierr); 1265 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1272 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1273 { 1274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 #if defined(PETSC_USE_LOG) 1279 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1280 #endif 1281 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1282 if (a->row) { 1283 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1284 } 1285 if (a->col) { 1286 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1287 } 1288 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1289 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1290 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1291 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1292 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1293 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1294 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1295 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1296 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1297 1298 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1299 ierr = PetscFree(a);CHKERRQ(ierr); 1300 1301 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1315 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 switch (op) { 1322 case MAT_ROW_ORIENTED: 1323 a->roworiented = flg; 1324 break; 1325 case MAT_KEEP_NONZERO_PATTERN: 1326 a->keepnonzeropattern = flg; 1327 break; 1328 case MAT_NEW_NONZERO_LOCATIONS: 1329 a->nonew = (flg ? 0 : 1); 1330 break; 1331 case MAT_NEW_NONZERO_LOCATION_ERR: 1332 a->nonew = (flg ? -1 : 0); 1333 break; 1334 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1335 a->nonew = (flg ? -2 : 0); 1336 break; 1337 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1338 a->nounused = (flg ? -1 : 0); 1339 break; 1340 case MAT_NEW_DIAGONALS: 1341 case MAT_IGNORE_OFF_PROC_ENTRIES: 1342 case MAT_USE_HASH_TABLE: 1343 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1344 break; 1345 case MAT_SYMMETRIC: 1346 case MAT_STRUCTURALLY_SYMMETRIC: 1347 case MAT_HERMITIAN: 1348 case MAT_SYMMETRY_ETERNAL: 1349 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1350 break; 1351 default: 1352 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1359 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1360 { 1361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1362 PetscErrorCode ierr; 1363 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1364 MatScalar *aa,*aa_i; 1365 PetscScalar *v_i; 1366 1367 PetscFunctionBegin; 1368 bs = A->rmap->bs; 1369 ai = a->i; 1370 aj = a->j; 1371 aa = a->a; 1372 bs2 = a->bs2; 1373 1374 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1375 1376 bn = row/bs; /* Block number */ 1377 bp = row % bs; /* Block Position */ 1378 M = ai[bn+1] - ai[bn]; 1379 *nz = bs*M; 1380 1381 if (v) { 1382 *v = 0; 1383 if (*nz) { 1384 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1385 for (i=0; i<M; i++) { /* for each block in the block row */ 1386 v_i = *v + i*bs; 1387 aa_i = aa + bs2*(ai[bn] + i); 1388 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1389 } 1390 } 1391 } 1392 1393 if (idx) { 1394 *idx = 0; 1395 if (*nz) { 1396 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1397 for (i=0; i<M; i++) { /* for each block in the block row */ 1398 idx_i = *idx + i*bs; 1399 itmp = bs*aj[ai[bn] + i]; 1400 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1401 } 1402 } 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1409 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1410 { 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1415 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1422 { 1423 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1424 Mat C; 1425 PetscErrorCode ierr; 1426 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1427 PetscInt *rows,*cols,bs2=a->bs2; 1428 MatScalar *array; 1429 1430 PetscFunctionBegin; 1431 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1432 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1433 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1434 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1435 1436 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1437 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1438 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1439 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1440 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1441 ierr = PetscFree(col);CHKERRQ(ierr); 1442 } else { 1443 C = *B; 1444 } 1445 1446 array = a->a; 1447 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1448 cols = rows + bs; 1449 for (i=0; i<mbs; i++) { 1450 cols[0] = i*bs; 1451 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1452 len = ai[i+1] - ai[i]; 1453 for (j=0; j<len; j++) { 1454 rows[0] = (*aj++)*bs; 1455 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1456 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1457 array += bs2; 1458 } 1459 } 1460 ierr = PetscFree(rows);CHKERRQ(ierr); 1461 1462 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 1465 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1466 *B = C; 1467 } else { 1468 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1475 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1476 { 1477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1480 int fd; 1481 PetscScalar *aa; 1482 FILE *file; 1483 1484 PetscFunctionBegin; 1485 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1486 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1487 col_lens[0] = MAT_FILE_COOKIE; 1488 1489 col_lens[1] = A->rmap->N; 1490 col_lens[2] = A->cmap->n; 1491 col_lens[3] = a->nz*bs2; 1492 1493 /* store lengths of each row and write (including header) to file */ 1494 count = 0; 1495 for (i=0; i<a->mbs; i++) { 1496 for (j=0; j<bs; j++) { 1497 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1498 } 1499 } 1500 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1501 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1502 1503 /* store column indices (zero start index) */ 1504 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1505 count = 0; 1506 for (i=0; i<a->mbs; i++) { 1507 for (j=0; j<bs; j++) { 1508 for (k=a->i[i]; k<a->i[i+1]; k++) { 1509 for (l=0; l<bs; l++) { 1510 jj[count++] = bs*a->j[k] + l; 1511 } 1512 } 1513 } 1514 } 1515 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1516 ierr = PetscFree(jj);CHKERRQ(ierr); 1517 1518 /* store nonzero values */ 1519 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1520 count = 0; 1521 for (i=0; i<a->mbs; i++) { 1522 for (j=0; j<bs; j++) { 1523 for (k=a->i[i]; k<a->i[i+1]; k++) { 1524 for (l=0; l<bs; l++) { 1525 aa[count++] = a->a[bs2*k + l*bs + j]; 1526 } 1527 } 1528 } 1529 } 1530 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1531 ierr = PetscFree(aa);CHKERRQ(ierr); 1532 1533 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1534 if (file) { 1535 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1543 { 1544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1545 PetscErrorCode ierr; 1546 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1547 PetscViewerFormat format; 1548 1549 PetscFunctionBegin; 1550 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1551 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1552 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1553 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1554 Mat aij; 1555 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1556 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1557 ierr = MatDestroy(aij);CHKERRQ(ierr); 1558 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1559 PetscFunctionReturn(0); 1560 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1562 for (i=0; i<a->mbs; i++) { 1563 for (j=0; j<bs; j++) { 1564 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1565 for (k=a->i[i]; k<a->i[i+1]; k++) { 1566 for (l=0; l<bs; l++) { 1567 #if defined(PETSC_USE_COMPLEX) 1568 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1570 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1573 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } 1577 #else 1578 if (a->a[bs2*k + l*bs + j] != 0.0) { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1580 } 1581 #endif 1582 } 1583 } 1584 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1585 } 1586 } 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1588 } else { 1589 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1590 for (i=0; i<a->mbs; i++) { 1591 for (j=0; j<bs; j++) { 1592 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1593 for (k=a->i[i]; k<a->i[i+1]; k++) { 1594 for (l=0; l<bs; l++) { 1595 #if defined(PETSC_USE_COMPLEX) 1596 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1597 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1598 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1599 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1600 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1601 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } else { 1603 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1604 } 1605 #else 1606 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1607 #endif 1608 } 1609 } 1610 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1611 } 1612 } 1613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1614 } 1615 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1621 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1622 { 1623 Mat A = (Mat) Aa; 1624 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1625 PetscErrorCode ierr; 1626 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1627 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1628 MatScalar *aa; 1629 PetscViewer viewer; 1630 1631 PetscFunctionBegin; 1632 1633 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1634 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1635 1636 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1637 1638 /* loop over matrix elements drawing boxes */ 1639 color = PETSC_DRAW_BLUE; 1640 for (i=0,row=0; i<mbs; i++,row+=bs) { 1641 for (j=a->i[i]; j<a->i[i+1]; j++) { 1642 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1644 aa = a->a + j*bs2; 1645 for (k=0; k<bs; k++) { 1646 for (l=0; l<bs; l++) { 1647 if (PetscRealPart(*aa++) >= 0.) continue; 1648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 } 1653 color = PETSC_DRAW_CYAN; 1654 for (i=0,row=0; i<mbs; i++,row+=bs) { 1655 for (j=a->i[i]; j<a->i[i+1]; j++) { 1656 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1657 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1658 aa = a->a + j*bs2; 1659 for (k=0; k<bs; k++) { 1660 for (l=0; l<bs; l++) { 1661 if (PetscRealPart(*aa++) != 0.) continue; 1662 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1663 } 1664 } 1665 } 1666 } 1667 1668 color = PETSC_DRAW_RED; 1669 for (i=0,row=0; i<mbs; i++,row+=bs) { 1670 for (j=a->i[i]; j<a->i[i+1]; j++) { 1671 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1673 aa = a->a + j*bs2; 1674 for (k=0; k<bs; k++) { 1675 for (l=0; l<bs; l++) { 1676 if (PetscRealPart(*aa++) <= 0.) continue; 1677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1678 } 1679 } 1680 } 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1688 { 1689 PetscErrorCode ierr; 1690 PetscReal xl,yl,xr,yr,w,h; 1691 PetscDraw draw; 1692 PetscTruth isnull; 1693 1694 PetscFunctionBegin; 1695 1696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1698 1699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1700 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1701 xr += w; yr += h; xl = -w; yl = -h; 1702 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatView_SeqBAIJ" 1710 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1711 { 1712 PetscErrorCode ierr; 1713 PetscTruth iascii,isbinary,isdraw; 1714 1715 PetscFunctionBegin; 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1718 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1719 if (iascii){ 1720 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1721 } else if (isbinary) { 1722 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1723 } else if (isdraw) { 1724 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1725 } else { 1726 Mat B; 1727 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1728 ierr = MatView(B,viewer);CHKERRQ(ierr); 1729 ierr = MatDestroy(B);CHKERRQ(ierr); 1730 } 1731 PetscFunctionReturn(0); 1732 } 1733 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1737 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1738 { 1739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1740 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1741 PetscInt *ai = a->i,*ailen = a->ilen; 1742 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1743 MatScalar *ap,*aa = a->a; 1744 1745 PetscFunctionBegin; 1746 for (k=0; k<m; k++) { /* loop over rows */ 1747 row = im[k]; brow = row/bs; 1748 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1749 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1750 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1751 nrow = ailen[brow]; 1752 for (l=0; l<n; l++) { /* loop over columns */ 1753 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1754 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1755 col = in[l] ; 1756 bcol = col/bs; 1757 cidx = col%bs; 1758 ridx = row%bs; 1759 high = nrow; 1760 low = 0; /* assume unsorted */ 1761 while (high-low > 5) { 1762 t = (low+high)/2; 1763 if (rp[t] > bcol) high = t; 1764 else low = t; 1765 } 1766 for (i=low; i<high; i++) { 1767 if (rp[i] > bcol) break; 1768 if (rp[i] == bcol) { 1769 *v++ = ap[bs2*i+bs*cidx+ridx]; 1770 goto finished; 1771 } 1772 } 1773 *v++ = 0.0; 1774 finished:; 1775 } 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #define CHUNKSIZE 10 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1783 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1784 { 1785 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1786 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1787 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1788 PetscErrorCode ierr; 1789 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1790 PetscTruth roworiented=a->roworiented; 1791 const PetscScalar *value = v; 1792 MatScalar *ap,*aa = a->a,*bap; 1793 1794 PetscFunctionBegin; 1795 if (roworiented) { 1796 stepval = (n-1)*bs; 1797 } else { 1798 stepval = (m-1)*bs; 1799 } 1800 for (k=0; k<m; k++) { /* loop over added rows */ 1801 row = im[k]; 1802 if (row < 0) continue; 1803 #if defined(PETSC_USE_DEBUG) 1804 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1805 #endif 1806 rp = aj + ai[row]; 1807 ap = aa + bs2*ai[row]; 1808 rmax = imax[row]; 1809 nrow = ailen[row]; 1810 low = 0; 1811 high = nrow; 1812 for (l=0; l<n; l++) { /* loop over added columns */ 1813 if (in[l] < 0) continue; 1814 #if defined(PETSC_USE_DEBUG) 1815 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1816 #endif 1817 col = in[l]; 1818 if (roworiented) { 1819 value = v + k*(stepval+bs)*bs + l*bs; 1820 } else { 1821 value = v + l*(stepval+bs)*bs + k*bs; 1822 } 1823 if (col <= lastcol) low = 0; else high = nrow; 1824 lastcol = col; 1825 while (high-low > 7) { 1826 t = (low+high)/2; 1827 if (rp[t] > col) high = t; 1828 else low = t; 1829 } 1830 for (i=low; i<high; i++) { 1831 if (rp[i] > col) break; 1832 if (rp[i] == col) { 1833 bap = ap + bs2*i; 1834 if (roworiented) { 1835 if (is == ADD_VALUES) { 1836 for (ii=0; ii<bs; ii++,value+=stepval) { 1837 for (jj=ii; jj<bs2; jj+=bs) { 1838 bap[jj] += *value++; 1839 } 1840 } 1841 } else { 1842 for (ii=0; ii<bs; ii++,value+=stepval) { 1843 for (jj=ii; jj<bs2; jj+=bs) { 1844 bap[jj] = *value++; 1845 } 1846 } 1847 } 1848 } else { 1849 if (is == ADD_VALUES) { 1850 for (ii=0; ii<bs; ii++,value+=stepval) { 1851 for (jj=0; jj<bs; jj++) { 1852 *bap++ += *value++; 1853 } 1854 } 1855 } else { 1856 for (ii=0; ii<bs; ii++,value+=stepval) { 1857 for (jj=0; jj<bs; jj++) { 1858 *bap++ = *value++; 1859 } 1860 } 1861 } 1862 } 1863 goto noinsert2; 1864 } 1865 } 1866 if (nonew == 1) goto noinsert2; 1867 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1868 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1869 N = nrow++ - 1; high++; 1870 /* shift up all the later entries in this row */ 1871 for (ii=N; ii>=i; ii--) { 1872 rp[ii+1] = rp[ii]; 1873 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1874 } 1875 if (N >= i) { 1876 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1877 } 1878 rp[i] = col; 1879 bap = ap + bs2*i; 1880 if (roworiented) { 1881 for (ii=0; ii<bs; ii++,value+=stepval) { 1882 for (jj=ii; jj<bs2; jj+=bs) { 1883 bap[jj] = *value++; 1884 } 1885 } 1886 } else { 1887 for (ii=0; ii<bs; ii++,value+=stepval) { 1888 for (jj=0; jj<bs; jj++) { 1889 *bap++ = *value++; 1890 } 1891 } 1892 } 1893 noinsert2:; 1894 low = i; 1895 } 1896 ailen[row] = nrow; 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1903 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1904 { 1905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1906 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1907 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1908 PetscErrorCode ierr; 1909 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1910 MatScalar *aa = a->a,*ap; 1911 PetscReal ratio=0.6; 1912 1913 PetscFunctionBegin; 1914 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1915 1916 if (m) rmax = ailen[0]; 1917 for (i=1; i<mbs; i++) { 1918 /* move each row back by the amount of empty slots (fshift) before it*/ 1919 fshift += imax[i-1] - ailen[i-1]; 1920 rmax = PetscMax(rmax,ailen[i]); 1921 if (fshift) { 1922 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1923 N = ailen[i]; 1924 for (j=0; j<N; j++) { 1925 ip[j-fshift] = ip[j]; 1926 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1927 } 1928 } 1929 ai[i] = ai[i-1] + ailen[i-1]; 1930 } 1931 if (mbs) { 1932 fshift += imax[mbs-1] - ailen[mbs-1]; 1933 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1934 } 1935 /* reset ilen and imax for each row */ 1936 for (i=0; i<mbs; i++) { 1937 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1938 } 1939 a->nz = ai[mbs]; 1940 1941 /* diagonals may have moved, so kill the diagonal pointers */ 1942 a->idiagvalid = PETSC_FALSE; 1943 if (fshift && a->diag) { 1944 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1945 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1946 a->diag = 0; 1947 } 1948 if (fshift && a->nounused == -1) { 1949 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1950 } 1951 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1953 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1954 a->reallocs = 0; 1955 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1956 1957 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1958 if (a->compressedrow.use){ 1959 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1960 } 1961 1962 A->same_nonzero = PETSC_TRUE; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* 1967 This function returns an array of flags which indicate the locations of contiguous 1968 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1969 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1970 Assume: sizes should be long enough to hold all the values. 1971 */ 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1974 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1975 { 1976 PetscInt i,j,k,row; 1977 PetscTruth flg; 1978 1979 PetscFunctionBegin; 1980 for (i=0,j=0; i<n; j++) { 1981 row = idx[i]; 1982 if (row%bs!=0) { /* Not the begining of a block */ 1983 sizes[j] = 1; 1984 i++; 1985 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1986 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1987 i++; 1988 } else { /* Begining of the block, so check if the complete block exists */ 1989 flg = PETSC_TRUE; 1990 for (k=1; k<bs; k++) { 1991 if (row+k != idx[i+k]) { /* break in the block */ 1992 flg = PETSC_FALSE; 1993 break; 1994 } 1995 } 1996 if (flg) { /* No break in the bs */ 1997 sizes[j] = bs; 1998 i+= bs; 1999 } else { 2000 sizes[j] = 1; 2001 i++; 2002 } 2003 } 2004 } 2005 *bs_max = j; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2012 { 2013 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,k,count,*rows; 2016 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2017 PetscScalar zero = 0.0; 2018 MatScalar *aa; 2019 2020 PetscFunctionBegin; 2021 /* Make a copy of the IS and sort it */ 2022 /* allocate memory for rows,sizes */ 2023 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2024 sizes = rows + is_n; 2025 2026 /* copy IS values to rows, and sort them */ 2027 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2028 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2029 if (baij->keepnonzeropattern) { 2030 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2031 bs_max = is_n; 2032 A->same_nonzero = PETSC_TRUE; 2033 } else { 2034 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2035 A->same_nonzero = PETSC_FALSE; 2036 } 2037 2038 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2039 row = rows[j]; 2040 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2041 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2042 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2043 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2044 if (diag != 0.0) { 2045 if (baij->ilen[row/bs] > 0) { 2046 baij->ilen[row/bs] = 1; 2047 baij->j[baij->i[row/bs]] = row/bs; 2048 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2049 } 2050 /* Now insert all the diagonal values for this bs */ 2051 for (k=0; k<bs; k++) { 2052 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2053 } 2054 } else { /* (diag == 0.0) */ 2055 baij->ilen[row/bs] = 0; 2056 } /* end (diag == 0.0) */ 2057 } else { /* (sizes[i] != bs) */ 2058 #if defined (PETSC_USE_DEBUG) 2059 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2060 #endif 2061 for (k=0; k<count; k++) { 2062 aa[0] = zero; 2063 aa += bs; 2064 } 2065 if (diag != 0.0) { 2066 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2067 } 2068 } 2069 } 2070 2071 ierr = PetscFree(rows);CHKERRQ(ierr); 2072 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2078 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2079 { 2080 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2081 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2082 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2083 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2084 PetscErrorCode ierr; 2085 PetscInt ridx,cidx,bs2=a->bs2; 2086 PetscTruth roworiented=a->roworiented; 2087 MatScalar *ap,value,*aa=a->a,*bap; 2088 2089 PetscFunctionBegin; 2090 for (k=0; k<m; k++) { /* loop over added rows */ 2091 row = im[k]; 2092 brow = row/bs; 2093 if (row < 0) continue; 2094 #if defined(PETSC_USE_DEBUG) 2095 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2096 #endif 2097 rp = aj + ai[brow]; 2098 ap = aa + bs2*ai[brow]; 2099 rmax = imax[brow]; 2100 nrow = ailen[brow]; 2101 low = 0; 2102 high = nrow; 2103 for (l=0; l<n; l++) { /* loop over added columns */ 2104 if (in[l] < 0) continue; 2105 #if defined(PETSC_USE_DEBUG) 2106 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2107 #endif 2108 col = in[l]; bcol = col/bs; 2109 ridx = row % bs; cidx = col % bs; 2110 if (roworiented) { 2111 value = v[l + k*n]; 2112 } else { 2113 value = v[k + l*m]; 2114 } 2115 if (col <= lastcol) low = 0; else high = nrow; 2116 lastcol = col; 2117 while (high-low > 7) { 2118 t = (low+high)/2; 2119 if (rp[t] > bcol) high = t; 2120 else low = t; 2121 } 2122 for (i=low; i<high; i++) { 2123 if (rp[i] > bcol) break; 2124 if (rp[i] == bcol) { 2125 bap = ap + bs2*i + bs*cidx + ridx; 2126 if (is == ADD_VALUES) *bap += value; 2127 else *bap = value; 2128 goto noinsert1; 2129 } 2130 } 2131 if (nonew == 1) goto noinsert1; 2132 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2134 N = nrow++ - 1; high++; 2135 /* shift up all the later entries in this row */ 2136 for (ii=N; ii>=i; ii--) { 2137 rp[ii+1] = rp[ii]; 2138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2139 } 2140 if (N>=i) { 2141 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2142 } 2143 rp[i] = bcol; 2144 ap[bs2*i + bs*cidx + ridx] = value; 2145 a->nz++; 2146 noinsert1:; 2147 low = i; 2148 } 2149 ailen[brow] = nrow; 2150 } 2151 A->same_nonzero = PETSC_FALSE; 2152 PetscFunctionReturn(0); 2153 } 2154 2155 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2156 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2159 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2160 { 2161 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2162 Mat outA; 2163 PetscErrorCode ierr; 2164 PetscTruth row_identity,col_identity; 2165 2166 PetscFunctionBegin; 2167 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2168 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2169 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2170 if (!row_identity || !col_identity) { 2171 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2172 } 2173 2174 outA = inA; 2175 inA->factor = MAT_FACTOR_LU; 2176 2177 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2178 2179 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2180 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2181 a->row = row; 2182 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2183 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2184 a->col = col; 2185 2186 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2187 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2188 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2189 2190 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2191 if (!a->solve_work) { 2192 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2193 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2194 } 2195 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2196 2197 PetscFunctionReturn(0); 2198 } 2199 2200 EXTERN_C_BEGIN 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2203 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2204 { 2205 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2206 PetscInt i,nz,mbs; 2207 2208 PetscFunctionBegin; 2209 nz = baij->maxnz/baij->bs2; 2210 mbs = baij->mbs; 2211 for (i=0; i<nz; i++) { 2212 baij->j[i] = indices[i]; 2213 } 2214 baij->nz = nz; 2215 for (i=0; i<mbs; i++) { 2216 baij->ilen[i] = baij->imax[i]; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 EXTERN_C_END 2221 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2224 /*@ 2225 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2226 in the matrix. 2227 2228 Input Parameters: 2229 + mat - the SeqBAIJ matrix 2230 - indices - the column indices 2231 2232 Level: advanced 2233 2234 Notes: 2235 This can be called if you have precomputed the nonzero structure of the 2236 matrix and want to provide it to the matrix object to improve the performance 2237 of the MatSetValues() operation. 2238 2239 You MUST have set the correct numbers of nonzeros per row in the call to 2240 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2241 2242 MUST be called before any calls to MatSetValues(); 2243 2244 @*/ 2245 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2246 { 2247 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2248 2249 PetscFunctionBegin; 2250 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2251 PetscValidPointer(indices,2); 2252 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2253 if (f) { 2254 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2255 } else { 2256 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2257 } 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2263 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2264 { 2265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2268 PetscReal atmp; 2269 PetscScalar *x,zero = 0.0; 2270 MatScalar *aa; 2271 PetscInt ncols,brow,krow,kcol; 2272 2273 PetscFunctionBegin; 2274 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2275 bs = A->rmap->bs; 2276 aa = a->a; 2277 ai = a->i; 2278 aj = a->j; 2279 mbs = a->mbs; 2280 2281 ierr = VecSet(v,zero);CHKERRQ(ierr); 2282 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2283 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2284 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2285 for (i=0; i<mbs; i++) { 2286 ncols = ai[1] - ai[0]; ai++; 2287 brow = bs*i; 2288 for (j=0; j<ncols; j++){ 2289 for (kcol=0; kcol<bs; kcol++){ 2290 for (krow=0; krow<bs; krow++){ 2291 atmp = PetscAbsScalar(*aa);aa++; 2292 row = brow + krow; /* row index */ 2293 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2294 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2295 } 2296 } 2297 aj++; 2298 } 2299 } 2300 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatCopy_SeqBAIJ" 2306 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2307 { 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 /* If the two matrices have the same copy implementation, use fast copy. */ 2312 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2314 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2315 2316 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2317 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2318 } 2319 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2320 } else { 2321 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2322 } 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2328 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2329 { 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2339 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2340 { 2341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2342 PetscFunctionBegin; 2343 *array = a->a; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2349 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2350 { 2351 PetscFunctionBegin; 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #include "petscblaslapack.h" 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2358 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2359 { 2360 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2361 PetscErrorCode ierr; 2362 PetscInt i,bs=Y->rmap->bs,j,bs2; 2363 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2364 2365 PetscFunctionBegin; 2366 if (str == SAME_NONZERO_PATTERN) { 2367 PetscScalar alpha = a; 2368 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2369 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2370 if (y->xtoy && y->XtoY != X) { 2371 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2372 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2373 } 2374 if (!y->xtoy) { /* get xtoy */ 2375 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2376 y->XtoY = X; 2377 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2378 } 2379 bs2 = bs*bs; 2380 for (i=0; i<x->nz; i++) { 2381 j = 0; 2382 while (j < bs2){ 2383 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2384 j++; 2385 } 2386 } 2387 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2388 } else { 2389 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2396 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2397 { 2398 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2399 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2400 MatScalar *aa = a->a; 2401 2402 PetscFunctionBegin; 2403 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2409 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2410 { 2411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2412 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2413 MatScalar *aa = a->a; 2414 2415 PetscFunctionBegin; 2416 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2424 /* 2425 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2426 */ 2427 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2428 { 2429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2430 PetscErrorCode ierr; 2431 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2432 PetscInt nz = a->i[m],row,*jj,mr,col; 2433 2434 PetscFunctionBegin; 2435 *nn = n; 2436 if (!ia) PetscFunctionReturn(0); 2437 if (symmetric) { 2438 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2439 } else { 2440 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2441 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2442 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2443 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2444 jj = a->j; 2445 for (i=0; i<nz; i++) { 2446 collengths[jj[i]]++; 2447 } 2448 cia[0] = oshift; 2449 for (i=0; i<n; i++) { 2450 cia[i+1] = cia[i] + collengths[i]; 2451 } 2452 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2453 jj = a->j; 2454 for (row=0; row<m; row++) { 2455 mr = a->i[row+1] - a->i[row]; 2456 for (i=0; i<mr; i++) { 2457 col = *jj++; 2458 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2459 } 2460 } 2461 ierr = PetscFree(collengths);CHKERRQ(ierr); 2462 *ia = cia; *ja = cja; 2463 } 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2469 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2470 { 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 if (!ia) PetscFunctionReturn(0); 2475 ierr = PetscFree(*ia);CHKERRQ(ierr); 2476 ierr = PetscFree(*ja);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2482 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2483 { 2484 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2485 PetscErrorCode ierr; 2486 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2487 PetscScalar dx,*y,*xx,*w3_array; 2488 PetscScalar *vscale_array; 2489 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2490 Vec w1=coloring->w1,w2=coloring->w2,w3; 2491 void *fctx = coloring->fctx; 2492 PetscTruth flg = PETSC_FALSE; 2493 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2494 Vec x1_tmp; 2495 2496 PetscFunctionBegin; 2497 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2498 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2499 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2500 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2501 2502 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2503 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2504 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2505 if (flg) { 2506 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2507 } else { 2508 PetscTruth assembled; 2509 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2510 if (assembled) { 2511 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2512 } 2513 } 2514 2515 x1_tmp = x1; 2516 if (!coloring->vscale){ 2517 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2518 } 2519 2520 /* 2521 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2522 coloring->F for the coarser grids from the finest 2523 */ 2524 if (coloring->F) { 2525 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2526 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2527 if (m1 != m2) { 2528 coloring->F = 0; 2529 } 2530 } 2531 2532 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2533 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2534 } 2535 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2536 2537 /* Set w1 = F(x1) */ 2538 if (coloring->F) { 2539 w1 = coloring->F; /* use already computed value of function */ 2540 coloring->F = 0; 2541 } else { 2542 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2543 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2544 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2545 } 2546 2547 if (!coloring->w3) { 2548 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2549 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2550 } 2551 w3 = coloring->w3; 2552 2553 CHKMEMQ; 2554 /* Compute all the local scale factors, including ghost points */ 2555 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2556 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2557 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2558 if (ctype == IS_COLORING_GHOSTED){ 2559 col_start = 0; col_end = N; 2560 } else if (ctype == IS_COLORING_GLOBAL){ 2561 xx = xx - start; 2562 vscale_array = vscale_array - start; 2563 col_start = start; col_end = N + start; 2564 } CHKMEMQ; 2565 for (col=col_start; col<col_end; col++){ 2566 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2567 if (coloring->htype[0] == 'w') { 2568 dx = 1.0 + unorm; 2569 } else { 2570 dx = xx[col]; 2571 } 2572 if (dx == 0.0) dx = 1.0; 2573 #if !defined(PETSC_USE_COMPLEX) 2574 if (dx < umin && dx >= 0.0) dx = umin; 2575 else if (dx < 0.0 && dx > -umin) dx = -umin; 2576 #else 2577 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2578 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2579 #endif 2580 dx *= epsilon; 2581 vscale_array[col] = 1.0/dx; 2582 } CHKMEMQ; 2583 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2584 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2585 if (ctype == IS_COLORING_GLOBAL){ 2586 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2587 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2588 } 2589 CHKMEMQ; 2590 if (coloring->vscaleforrow) { 2591 vscaleforrow = coloring->vscaleforrow; 2592 } else { 2593 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2594 } 2595 2596 2597 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2598 /* 2599 Loop over each color 2600 */ 2601 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2602 for (k=0; k<coloring->ncolors; k++) { 2603 coloring->currentcolor = k; 2604 for (i=0; i<bs; i++) { 2605 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2606 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2607 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2608 /* 2609 Loop over each column associated with color 2610 adding the perturbation to the vector w3. 2611 */ 2612 for (l=0; l<coloring->ncolumns[k]; l++) { 2613 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2614 if (coloring->htype[0] == 'w') { 2615 dx = 1.0 + unorm; 2616 } else { 2617 dx = xx[col]; 2618 } 2619 if (dx == 0.0) dx = 1.0; 2620 #if !defined(PETSC_USE_COMPLEX) 2621 if (dx < umin && dx >= 0.0) dx = umin; 2622 else if (dx < 0.0 && dx > -umin) dx = -umin; 2623 #else 2624 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2625 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2626 #endif 2627 dx *= epsilon; 2628 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2629 w3_array[col] += dx; 2630 } 2631 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2632 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2633 2634 /* 2635 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2636 w2 = F(x1 + dx) - F(x1) 2637 */ 2638 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2639 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2640 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2641 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2642 2643 /* 2644 Loop over rows of vector, putting results into Jacobian matrix 2645 */ 2646 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2647 for (l=0; l<coloring->nrows[k]; l++) { 2648 row = bs*coloring->rows[k][l]; /* local row index */ 2649 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2650 for (j=0; j<bs; j++) { 2651 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2652 srows[j] = row + start + j; 2653 } 2654 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2655 } 2656 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2657 } 2658 } /* endof for each color */ 2659 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2660 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2661 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2662 ierr = PetscFree(srows);CHKERRQ(ierr); 2663 2664 coloring->currentcolor = -1; 2665 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2666 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2667 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 /* -------------------------------------------------------------------*/ 2672 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2673 MatGetRow_SeqBAIJ, 2674 MatRestoreRow_SeqBAIJ, 2675 MatMult_SeqBAIJ_N, 2676 /* 4*/ MatMultAdd_SeqBAIJ_N, 2677 MatMultTranspose_SeqBAIJ, 2678 MatMultTransposeAdd_SeqBAIJ, 2679 0, 2680 0, 2681 0, 2682 /*10*/ 0, 2683 MatLUFactor_SeqBAIJ, 2684 0, 2685 0, 2686 MatTranspose_SeqBAIJ, 2687 /*15*/ MatGetInfo_SeqBAIJ, 2688 MatEqual_SeqBAIJ, 2689 MatGetDiagonal_SeqBAIJ, 2690 MatDiagonalScale_SeqBAIJ, 2691 MatNorm_SeqBAIJ, 2692 /*20*/ 0, 2693 MatAssemblyEnd_SeqBAIJ, 2694 MatSetOption_SeqBAIJ, 2695 MatZeroEntries_SeqBAIJ, 2696 /*24*/ MatZeroRows_SeqBAIJ, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2702 0, 2703 0, 2704 MatGetArray_SeqBAIJ, 2705 MatRestoreArray_SeqBAIJ, 2706 /*34*/ MatDuplicate_SeqBAIJ, 2707 0, 2708 0, 2709 MatILUFactor_SeqBAIJ, 2710 0, 2711 /*39*/ MatAXPY_SeqBAIJ, 2712 MatGetSubMatrices_SeqBAIJ, 2713 MatIncreaseOverlap_SeqBAIJ, 2714 MatGetValues_SeqBAIJ, 2715 MatCopy_SeqBAIJ, 2716 /*44*/ 0, 2717 MatScale_SeqBAIJ, 2718 0, 2719 0, 2720 MatILUDTFactor_SeqBAIJ, 2721 /*49*/ 0, 2722 MatGetRowIJ_SeqBAIJ, 2723 MatRestoreRowIJ_SeqBAIJ, 2724 MatGetColumnIJ_SeqBAIJ, 2725 MatRestoreColumnIJ_SeqBAIJ, 2726 /*54*/ MatFDColoringCreate_SeqAIJ, 2727 0, 2728 0, 2729 0, 2730 MatSetValuesBlocked_SeqBAIJ, 2731 /*59*/ MatGetSubMatrix_SeqBAIJ, 2732 MatDestroy_SeqBAIJ, 2733 MatView_SeqBAIJ, 2734 0, 2735 0, 2736 /*64*/ 0, 2737 0, 2738 0, 2739 0, 2740 0, 2741 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2742 0, 2743 MatConvert_Basic, 2744 0, 2745 0, 2746 /*74*/ 0, 2747 MatFDColoringApply_BAIJ, 2748 0, 2749 0, 2750 0, 2751 /*79*/ 0, 2752 0, 2753 0, 2754 0, 2755 MatLoad_SeqBAIJ, 2756 /*84*/ 0, 2757 0, 2758 0, 2759 0, 2760 0, 2761 /*89*/ 0, 2762 0, 2763 0, 2764 0, 2765 0, 2766 /*94*/ 0, 2767 0, 2768 0, 2769 0, 2770 0, 2771 /*99*/0, 2772 0, 2773 0, 2774 0, 2775 0, 2776 /*104*/0, 2777 MatRealPart_SeqBAIJ, 2778 MatImaginaryPart_SeqBAIJ, 2779 0, 2780 0, 2781 /*109*/0, 2782 0, 2783 0, 2784 0, 2785 MatMissingDiagonal_SeqBAIJ 2786 /*114*/ 2787 }; 2788 2789 EXTERN_C_BEGIN 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2792 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2793 { 2794 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2795 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 if (aij->nonew != 1) { 2800 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2801 } 2802 2803 /* allocate space for values if not already there */ 2804 if (!aij->saved_values) { 2805 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2806 } 2807 2808 /* copy values over */ 2809 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 EXTERN_C_END 2813 2814 EXTERN_C_BEGIN 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2817 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2818 { 2819 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2820 PetscErrorCode ierr; 2821 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2822 2823 PetscFunctionBegin; 2824 if (aij->nonew != 1) { 2825 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2826 } 2827 if (!aij->saved_values) { 2828 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2829 } 2830 2831 /* copy values over */ 2832 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 EXTERN_C_END 2836 2837 EXTERN_C_BEGIN 2838 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2839 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2840 EXTERN_C_END 2841 2842 EXTERN_C_BEGIN 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2845 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2846 { 2847 Mat_SeqBAIJ *b; 2848 PetscErrorCode ierr; 2849 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2850 PetscTruth flg,skipallocation = PETSC_FALSE; 2851 2852 PetscFunctionBegin; 2853 2854 if (nz == MAT_SKIP_ALLOCATION) { 2855 skipallocation = PETSC_TRUE; 2856 nz = 0; 2857 } 2858 2859 if (bs < 0) { 2860 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2861 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2862 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2863 bs = PetscAbs(bs); 2864 } 2865 if (nnz && newbs != bs) { 2866 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2867 } 2868 bs = newbs; 2869 2870 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2871 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2872 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2873 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2874 2875 B->preallocated = PETSC_TRUE; 2876 2877 mbs = B->rmap->n/bs; 2878 nbs = B->cmap->n/bs; 2879 bs2 = bs*bs; 2880 2881 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2882 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2883 } 2884 2885 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2886 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2887 if (nnz) { 2888 for (i=0; i<mbs; i++) { 2889 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2890 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2891 } 2892 } 2893 2894 b = (Mat_SeqBAIJ*)B->data; 2895 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2896 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2897 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2898 2899 if (!flg) { 2900 switch (bs) { 2901 case 1: 2902 B->ops->mult = MatMult_SeqBAIJ_1; 2903 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2904 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2905 break; 2906 case 2: 2907 B->ops->mult = MatMult_SeqBAIJ_2; 2908 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2909 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2910 break; 2911 case 3: 2912 B->ops->mult = MatMult_SeqBAIJ_3; 2913 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2914 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2915 break; 2916 case 4: 2917 B->ops->mult = MatMult_SeqBAIJ_4; 2918 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2919 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2920 break; 2921 case 5: 2922 B->ops->mult = MatMult_SeqBAIJ_5; 2923 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2924 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2925 break; 2926 case 6: 2927 B->ops->mult = MatMult_SeqBAIJ_6; 2928 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2929 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2930 break; 2931 case 7: 2932 B->ops->mult = MatMult_SeqBAIJ_7; 2933 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2934 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2935 break; 2936 default: 2937 B->ops->mult = MatMult_SeqBAIJ_N; 2938 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2939 break; 2940 } 2941 } 2942 B->rmap->bs = bs; 2943 b->mbs = mbs; 2944 b->nbs = nbs; 2945 if (!skipallocation) { 2946 if (!b->imax) { 2947 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2948 } 2949 /* b->ilen will count nonzeros in each block row so far. */ 2950 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2951 if (!nnz) { 2952 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2953 else if (nz <= 0) nz = 1; 2954 for (i=0; i<mbs; i++) b->imax[i] = nz; 2955 nz = nz*mbs; 2956 } else { 2957 nz = 0; 2958 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2959 } 2960 2961 /* allocate the matrix space */ 2962 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2963 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2964 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2965 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2966 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2967 b->singlemalloc = PETSC_TRUE; 2968 b->i[0] = 0; 2969 for (i=1; i<mbs+1; i++) { 2970 b->i[i] = b->i[i-1] + b->imax[i-1]; 2971 } 2972 b->free_a = PETSC_TRUE; 2973 b->free_ij = PETSC_TRUE; 2974 } else { 2975 b->free_a = PETSC_FALSE; 2976 b->free_ij = PETSC_FALSE; 2977 } 2978 2979 B->rmap->bs = bs; 2980 b->bs2 = bs2; 2981 b->mbs = mbs; 2982 b->nz = 0; 2983 b->maxnz = nz*bs2; 2984 B->info.nz_unneeded = (PetscReal)b->maxnz; 2985 PetscFunctionReturn(0); 2986 } 2987 EXTERN_C_END 2988 2989 EXTERN_C_BEGIN 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2992 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2993 { 2994 PetscInt i,m,nz,nz_max=0,*nnz; 2995 PetscScalar *values=0; 2996 PetscErrorCode ierr; 2997 2998 PetscFunctionBegin; 2999 3000 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3001 3002 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3003 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3004 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3005 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3006 m = B->rmap->n/bs; 3007 3008 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3009 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3010 for(i=0; i<m; i++) { 3011 nz = ii[i+1]- ii[i]; 3012 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3013 nz_max = PetscMax(nz_max, nz); 3014 nnz[i] = nz; 3015 } 3016 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3017 ierr = PetscFree(nnz);CHKERRQ(ierr); 3018 3019 values = (PetscScalar*)V; 3020 if (!values) { 3021 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3022 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3023 } 3024 for (i=0; i<m; i++) { 3025 PetscInt ncols = ii[i+1] - ii[i]; 3026 const PetscInt *icols = jj + ii[i]; 3027 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3028 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3029 } 3030 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3031 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 3034 PetscFunctionReturn(0); 3035 } 3036 EXTERN_C_END 3037 3038 3039 EXTERN_C_BEGIN 3040 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3041 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3042 EXTERN_C_END 3043 3044 /*MC 3045 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3046 block sparse compressed row format. 3047 3048 Options Database Keys: 3049 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3050 3051 Level: beginner 3052 3053 .seealso: MatCreateSeqBAIJ() 3054 M*/ 3055 3056 3057 EXTERN_C_BEGIN 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatCreate_SeqBAIJ" 3060 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3061 { 3062 PetscErrorCode ierr; 3063 PetscMPIInt size; 3064 Mat_SeqBAIJ *b; 3065 3066 PetscFunctionBegin; 3067 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3068 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3069 3070 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3071 B->data = (void*)b; 3072 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3073 B->mapping = 0; 3074 b->row = 0; 3075 b->col = 0; 3076 b->icol = 0; 3077 b->reallocs = 0; 3078 b->saved_values = 0; 3079 3080 b->roworiented = PETSC_TRUE; 3081 b->nonew = 0; 3082 b->diag = 0; 3083 b->solve_work = 0; 3084 b->mult_work = 0; 3085 B->spptr = 0; 3086 B->info.nz_unneeded = (PetscReal)b->maxnz; 3087 b->keepnonzeropattern = PETSC_FALSE; 3088 b->xtoy = 0; 3089 b->XtoY = 0; 3090 b->compressedrow.use = PETSC_FALSE; 3091 b->compressedrow.nrows = 0; 3092 b->compressedrow.i = PETSC_NULL; 3093 b->compressedrow.rindex = PETSC_NULL; 3094 b->compressedrow.checked = PETSC_FALSE; 3095 B->same_nonzero = PETSC_FALSE; 3096 3097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 3098 "MatGetFactorAvailable_seqbaij_petsc", 3099 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 3101 "MatGetFactor_seqbaij_petsc", 3102 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3104 "MatInvertBlockDiagonal_SeqBAIJ", 3105 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3107 "MatStoreValues_SeqBAIJ", 3108 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3110 "MatRetrieveValues_SeqBAIJ", 3111 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3113 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3114 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3116 "MatConvert_SeqBAIJ_SeqAIJ", 3117 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3119 "MatConvert_SeqBAIJ_SeqSBAIJ", 3120 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3122 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3123 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3125 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3126 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3127 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3128 PetscFunctionReturn(0); 3129 } 3130 EXTERN_C_END 3131 3132 #undef __FUNCT__ 3133 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3134 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3135 { 3136 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3137 PetscErrorCode ierr; 3138 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3139 3140 PetscFunctionBegin; 3141 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3142 3143 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3144 for (i=0; i<mbs; i++) { 3145 c->imax[i] = a->imax[i]; 3146 c->ilen[i] = a->ilen[i]; 3147 } 3148 3149 /* allocate the matrix space */ 3150 if (mallocmatspace){ 3151 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3152 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3153 c->singlemalloc = PETSC_TRUE; 3154 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3155 if (mbs > 0) { 3156 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3157 if (cpvalues == MAT_COPY_VALUES) { 3158 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3159 } else { 3160 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3161 } 3162 } 3163 } 3164 3165 c->roworiented = a->roworiented; 3166 c->nonew = a->nonew; 3167 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 3168 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 3169 c->bs2 = a->bs2; 3170 c->mbs = a->mbs; 3171 c->nbs = a->nbs; 3172 3173 if (a->diag) { 3174 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3175 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3176 for (i=0; i<mbs; i++) { 3177 c->diag[i] = a->diag[i]; 3178 } 3179 } else c->diag = 0; 3180 c->nz = a->nz; 3181 c->maxnz = a->maxnz; 3182 c->solve_work = 0; 3183 c->mult_work = 0; 3184 c->free_a = PETSC_TRUE; 3185 c->free_ij = PETSC_TRUE; 3186 C->preallocated = PETSC_TRUE; 3187 C->assembled = PETSC_TRUE; 3188 3189 c->compressedrow.use = a->compressedrow.use; 3190 c->compressedrow.nrows = a->compressedrow.nrows; 3191 c->compressedrow.checked = a->compressedrow.checked; 3192 if ( a->compressedrow.checked && a->compressedrow.use){ 3193 i = a->compressedrow.nrows; 3194 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 3195 c->compressedrow.rindex = c->compressedrow.i + i + 1; 3196 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3197 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3198 } else { 3199 c->compressedrow.use = PETSC_FALSE; 3200 c->compressedrow.i = PETSC_NULL; 3201 c->compressedrow.rindex = PETSC_NULL; 3202 } 3203 C->same_nonzero = A->same_nonzero; 3204 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3205 PetscFunctionReturn(0); 3206 } 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3210 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3211 { 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3216 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3217 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3218 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3219 PetscFunctionReturn(0); 3220 } 3221 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatLoad_SeqBAIJ" 3224 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3225 { 3226 Mat_SeqBAIJ *a; 3227 Mat B; 3228 PetscErrorCode ierr; 3229 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3230 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3231 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3232 PetscInt *masked,nmask,tmp,bs2,ishift; 3233 PetscMPIInt size; 3234 int fd; 3235 PetscScalar *aa; 3236 MPI_Comm comm = ((PetscObject)viewer)->comm; 3237 3238 PetscFunctionBegin; 3239 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3240 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3241 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3242 bs2 = bs*bs; 3243 3244 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3245 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3246 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3247 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3248 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3249 M = header[1]; N = header[2]; nz = header[3]; 3250 3251 if (header[3] < 0) { 3252 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3253 } 3254 3255 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3256 3257 /* 3258 This code adds extra rows to make sure the number of rows is 3259 divisible by the blocksize 3260 */ 3261 mbs = M/bs; 3262 extra_rows = bs - M + bs*(mbs); 3263 if (extra_rows == bs) extra_rows = 0; 3264 else mbs++; 3265 if (extra_rows) { 3266 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3267 } 3268 3269 /* read in row lengths */ 3270 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3271 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3272 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3273 3274 /* read in column indices */ 3275 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3276 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3277 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3278 3279 /* loop over row lengths determining block row lengths */ 3280 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3281 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3282 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3283 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3284 masked = mask + mbs; 3285 rowcount = 0; nzcount = 0; 3286 for (i=0; i<mbs; i++) { 3287 nmask = 0; 3288 for (j=0; j<bs; j++) { 3289 kmax = rowlengths[rowcount]; 3290 for (k=0; k<kmax; k++) { 3291 tmp = jj[nzcount++]/bs; 3292 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3293 } 3294 rowcount++; 3295 } 3296 browlengths[i] += nmask; 3297 /* zero out the mask elements we set */ 3298 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3299 } 3300 3301 /* create our matrix */ 3302 ierr = MatCreate(comm,&B); 3303 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3304 ierr = MatSetType(B,type);CHKERRQ(ierr); 3305 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3306 a = (Mat_SeqBAIJ*)B->data; 3307 3308 /* set matrix "i" values */ 3309 a->i[0] = 0; 3310 for (i=1; i<= mbs; i++) { 3311 a->i[i] = a->i[i-1] + browlengths[i-1]; 3312 a->ilen[i-1] = browlengths[i-1]; 3313 } 3314 a->nz = 0; 3315 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3316 3317 /* read in nonzero values */ 3318 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3319 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3320 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3321 3322 /* set "a" and "j" values into matrix */ 3323 nzcount = 0; jcount = 0; 3324 for (i=0; i<mbs; i++) { 3325 nzcountb = nzcount; 3326 nmask = 0; 3327 for (j=0; j<bs; j++) { 3328 kmax = rowlengths[i*bs+j]; 3329 for (k=0; k<kmax; k++) { 3330 tmp = jj[nzcount++]/bs; 3331 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3332 } 3333 } 3334 /* sort the masked values */ 3335 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3336 3337 /* set "j" values into matrix */ 3338 maskcount = 1; 3339 for (j=0; j<nmask; j++) { 3340 a->j[jcount++] = masked[j]; 3341 mask[masked[j]] = maskcount++; 3342 } 3343 /* set "a" values into matrix */ 3344 ishift = bs2*a->i[i]; 3345 for (j=0; j<bs; j++) { 3346 kmax = rowlengths[i*bs+j]; 3347 for (k=0; k<kmax; k++) { 3348 tmp = jj[nzcountb]/bs ; 3349 block = mask[tmp] - 1; 3350 point = jj[nzcountb] - bs*tmp; 3351 idx = ishift + bs2*block + j + bs*point; 3352 a->a[idx] = (MatScalar)aa[nzcountb++]; 3353 } 3354 } 3355 /* zero out the mask elements we set */ 3356 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3357 } 3358 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3359 3360 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3361 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3362 ierr = PetscFree(aa);CHKERRQ(ierr); 3363 ierr = PetscFree(jj);CHKERRQ(ierr); 3364 ierr = PetscFree(mask);CHKERRQ(ierr); 3365 3366 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 ierr = MatView_Private(B);CHKERRQ(ierr); 3369 3370 *A = B; 3371 PetscFunctionReturn(0); 3372 } 3373 3374 #undef __FUNCT__ 3375 #define __FUNCT__ "MatCreateSeqBAIJ" 3376 /*@C 3377 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3378 compressed row) format. For good matrix assembly performance the 3379 user should preallocate the matrix storage by setting the parameter nz 3380 (or the array nnz). By setting these parameters accurately, performance 3381 during matrix assembly can be increased by more than a factor of 50. 3382 3383 Collective on MPI_Comm 3384 3385 Input Parameters: 3386 + comm - MPI communicator, set to PETSC_COMM_SELF 3387 . bs - size of block 3388 . m - number of rows 3389 . n - number of columns 3390 . nz - number of nonzero blocks per block row (same for all rows) 3391 - nnz - array containing the number of nonzero blocks in the various block rows 3392 (possibly different for each block row) or PETSC_NULL 3393 3394 Output Parameter: 3395 . A - the matrix 3396 3397 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3398 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3399 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3400 3401 Options Database Keys: 3402 . -mat_no_unroll - uses code that does not unroll the loops in the 3403 block calculations (much slower) 3404 . -mat_block_size - size of the blocks to use 3405 3406 Level: intermediate 3407 3408 Notes: 3409 The number of rows and columns must be divisible by blocksize. 3410 3411 If the nnz parameter is given then the nz parameter is ignored 3412 3413 A nonzero block is any block that as 1 or more nonzeros in it 3414 3415 The block AIJ format is fully compatible with standard Fortran 77 3416 storage. That is, the stored row and column indices can begin at 3417 either one (as in Fortran) or zero. See the users' manual for details. 3418 3419 Specify the preallocated storage with either nz or nnz (not both). 3420 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3421 allocation. For additional details, see the users manual chapter on 3422 matrices. 3423 3424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3425 @*/ 3426 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3427 { 3428 PetscErrorCode ierr; 3429 3430 PetscFunctionBegin; 3431 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3432 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3433 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3434 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 #undef __FUNCT__ 3439 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3440 /*@C 3441 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3442 per row in the matrix. For good matrix assembly performance the 3443 user should preallocate the matrix storage by setting the parameter nz 3444 (or the array nnz). By setting these parameters accurately, performance 3445 during matrix assembly can be increased by more than a factor of 50. 3446 3447 Collective on MPI_Comm 3448 3449 Input Parameters: 3450 + A - the matrix 3451 . bs - size of block 3452 . nz - number of block nonzeros per block row (same for all rows) 3453 - nnz - array containing the number of block nonzeros in the various block rows 3454 (possibly different for each block row) or PETSC_NULL 3455 3456 Options Database Keys: 3457 . -mat_no_unroll - uses code that does not unroll the loops in the 3458 block calculations (much slower) 3459 . -mat_block_size - size of the blocks to use 3460 3461 Level: intermediate 3462 3463 Notes: 3464 If the nnz parameter is given then the nz parameter is ignored 3465 3466 You can call MatGetInfo() to get information on how effective the preallocation was; 3467 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3468 You can also run with the option -info and look for messages with the string 3469 malloc in them to see if additional memory allocation was needed. 3470 3471 The block AIJ format is fully compatible with standard Fortran 77 3472 storage. That is, the stored row and column indices can begin at 3473 either one (as in Fortran) or zero. See the users' manual for details. 3474 3475 Specify the preallocated storage with either nz or nnz (not both). 3476 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3477 allocation. For additional details, see the users manual chapter on 3478 matrices. 3479 3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3481 @*/ 3482 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3483 { 3484 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3485 3486 PetscFunctionBegin; 3487 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3488 if (f) { 3489 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3490 } 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3496 /*@C 3497 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3498 (the default sequential PETSc format). 3499 3500 Collective on MPI_Comm 3501 3502 Input Parameters: 3503 + A - the matrix 3504 . i - the indices into j for the start of each local row (starts with zero) 3505 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3506 - v - optional values in the matrix 3507 3508 Level: developer 3509 3510 .keywords: matrix, aij, compressed row, sparse 3511 3512 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3513 @*/ 3514 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3515 { 3516 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3517 3518 PetscFunctionBegin; 3519 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3520 if (f) { 3521 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3522 } 3523 PetscFunctionReturn(0); 3524 } 3525 3526 3527 #undef __FUNCT__ 3528 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3529 /*@ 3530 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3531 (upper triangular entries in CSR format) provided by the user. 3532 3533 Collective on MPI_Comm 3534 3535 Input Parameters: 3536 + comm - must be an MPI communicator of size 1 3537 . bs - size of block 3538 . m - number of rows 3539 . n - number of columns 3540 . i - row indices 3541 . j - column indices 3542 - a - matrix values 3543 3544 Output Parameter: 3545 . mat - the matrix 3546 3547 Level: intermediate 3548 3549 Notes: 3550 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3551 once the matrix is destroyed 3552 3553 You cannot set new nonzero locations into this matrix, that will generate an error. 3554 3555 The i and j indices are 0 based 3556 3557 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3558 3559 @*/ 3560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3561 { 3562 PetscErrorCode ierr; 3563 PetscInt ii; 3564 Mat_SeqBAIJ *baij; 3565 3566 PetscFunctionBegin; 3567 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3568 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3569 3570 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3571 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3572 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3573 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3574 baij = (Mat_SeqBAIJ*)(*mat)->data; 3575 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3576 3577 baij->i = i; 3578 baij->j = j; 3579 baij->a = a; 3580 baij->singlemalloc = PETSC_FALSE; 3581 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3582 baij->free_a = PETSC_FALSE; 3583 baij->free_ij = PETSC_FALSE; 3584 3585 for (ii=0; ii<m; ii++) { 3586 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3587 #if defined(PETSC_USE_DEBUG) 3588 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3589 #endif 3590 } 3591 #if defined(PETSC_USE_DEBUG) 3592 for (ii=0; ii<baij->i[m]; ii++) { 3593 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3594 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3595 } 3596 #endif 3597 3598 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3599 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3600 PetscFunctionReturn(0); 3601 } 3602