1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,idx; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 while (nz--) { 175 idx = (*vi++); 176 x1 = x[idx]; 177 s1 -= v[0]*x1; 178 v += 1; 179 } 180 x[i2] = idiag[0]*s1; 181 idiag += 1; 182 i2 += 1; 183 } 184 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 185 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 186 } 187 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 188 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 189 i2 = 0; 190 mdiag = a->idiag+a->mbs; 191 for (i=0; i<m; i++) { 192 x1 = x[i2]; 193 x[i2] = mdiag[0]*x1; 194 mdiag += 1; 195 i2 += 1; 196 } 197 ierr = PetscLogFlops(m);CHKERRQ(ierr); 198 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 199 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 200 } 201 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 202 idiag = a->idiag+a->mbs - 1; 203 i2 = m - 1; 204 x1 = x[i2]; 205 x[i2] = idiag[0]*x1; 206 idiag -= 1; 207 i2 -= 1; 208 for (i=m-2; i>=0; i--) { 209 v = aa + (diag[i]+1); 210 vi = aj + diag[i] + 1; 211 nz = ai[i+1] - diag[i] - 1; 212 s1 = x[i2]; 213 while (nz--) { 214 idx = (*vi++); 215 x1 = x[idx]; 216 s1 -= v[0]*x1; 217 v += 1; 218 } 219 x[i2] = idiag[0]*s1; 220 idiag -= 1; 221 i2 -= 1; 222 } 223 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 224 } 225 } else { 226 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 227 } 228 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 229 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *x,x1,x2,s1,s2; 239 const PetscScalar *b; 240 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 241 PetscErrorCode ierr; 242 PetscInt m = a->mbs,i,i2,nz,idx; 243 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 244 245 PetscFunctionBegin; 246 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 247 its = its*lits; 248 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 249 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 250 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 251 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 252 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 253 254 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 255 256 diag = a->diag; 257 idiag = a->idiag; 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 260 261 if (flag & SOR_ZERO_INITIAL_GUESS) { 262 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 263 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 264 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 265 i2 = 2; 266 idiag += 4; 267 for (i=1; i<m; i++) { 268 v = aa + 4*ai[i]; 269 vi = aj + ai[i]; 270 nz = diag[i] - ai[i]; 271 s1 = b[i2]; s2 = b[i2+1]; 272 while (nz--) { 273 idx = 2*(*vi++); 274 x1 = x[idx]; x2 = x[1+idx]; 275 s1 -= v[0]*x1 + v[2]*x2; 276 s2 -= v[1]*x1 + v[3]*x2; 277 v += 4; 278 } 279 x[i2] = idiag[0]*s1 + idiag[2]*s2; 280 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 281 idiag += 4; 282 i2 += 2; 283 } 284 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 285 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 286 } 287 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 288 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 289 i2 = 0; 290 mdiag = a->idiag+4*a->mbs; 291 for (i=0; i<m; i++) { 292 x1 = x[i2]; x2 = x[i2+1]; 293 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 294 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 295 mdiag += 4; 296 i2 += 2; 297 } 298 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 299 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 300 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 301 } 302 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 303 idiag = a->idiag+4*a->mbs - 4; 304 i2 = 2*m - 2; 305 x1 = x[i2]; x2 = x[i2+1]; 306 x[i2] = idiag[0]*x1 + idiag[2]*x2; 307 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 308 idiag -= 4; 309 i2 -= 2; 310 for (i=m-2; i>=0; i--) { 311 v = aa + 4*(diag[i]+1); 312 vi = aj + diag[i] + 1; 313 nz = ai[i+1] - diag[i] - 1; 314 s1 = x[i2]; s2 = x[i2+1]; 315 while (nz--) { 316 idx = 2*(*vi++); 317 x1 = x[idx]; x2 = x[1+idx]; 318 s1 -= v[0]*x1 + v[2]*x2; 319 s2 -= v[1]*x1 + v[3]*x2; 320 v += 4; 321 } 322 x[i2] = idiag[0]*s1 + idiag[2]*s2; 323 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 324 idiag -= 4; 325 i2 -= 2; 326 } 327 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 328 } 329 } else { 330 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 331 } 332 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 333 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 340 { 341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 342 PetscScalar *x,x1,x2,x3,s1,s2,s3; 343 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 344 const PetscScalar *b; 345 PetscErrorCode ierr; 346 PetscInt m = a->mbs,i,i2,nz,idx; 347 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 348 349 PetscFunctionBegin; 350 its = its*lits; 351 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 352 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 353 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 354 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 355 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 356 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 357 358 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 359 360 diag = a->diag; 361 idiag = a->idiag; 362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 363 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 364 365 if (flag & SOR_ZERO_INITIAL_GUESS) { 366 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 367 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 368 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 369 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 370 i2 = 3; 371 idiag += 9; 372 for (i=1; i<m; i++) { 373 v = aa + 9*ai[i]; 374 vi = aj + ai[i]; 375 nz = diag[i] - ai[i]; 376 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 377 while (nz--) { 378 idx = 3*(*vi++); 379 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 380 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 381 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 382 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 383 v += 9; 384 } 385 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 386 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 387 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 388 idiag += 9; 389 i2 += 3; 390 } 391 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 392 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 393 } 394 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 395 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 396 i2 = 0; 397 mdiag = a->idiag+9*a->mbs; 398 for (i=0; i<m; i++) { 399 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 400 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 401 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 402 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 403 mdiag += 9; 404 i2 += 3; 405 } 406 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 407 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 408 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 409 } 410 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 411 idiag = a->idiag+9*a->mbs - 9; 412 i2 = 3*m - 3; 413 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 414 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 415 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 416 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 417 idiag -= 9; 418 i2 -= 3; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 9*(diag[i]+1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 424 while (nz--) { 425 idx = 3*(*vi++); 426 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 427 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 428 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 429 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 430 v += 9; 431 } 432 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 433 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 434 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 435 idiag -= 9; 436 i2 -= 3; 437 } 438 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 439 } 440 } else { 441 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 442 } 443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 451 { 452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 453 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 454 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 455 const PetscScalar *b; 456 PetscErrorCode ierr; 457 PetscInt m = a->mbs,i,i2,nz,idx; 458 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 459 460 PetscFunctionBegin; 461 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 462 its = its*lits; 463 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 464 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 465 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 466 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 467 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 468 469 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 470 471 diag = a->diag; 472 idiag = a->idiag; 473 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 475 476 if (flag & SOR_ZERO_INITIAL_GUESS) { 477 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 478 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 479 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 480 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 481 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 482 i2 = 4; 483 idiag += 16; 484 for (i=1; i<m; i++) { 485 v = aa + 16*ai[i]; 486 vi = aj + ai[i]; 487 nz = diag[i] - ai[i]; 488 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 489 while (nz--) { 490 idx = 4*(*vi++); 491 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 492 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 493 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 494 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 495 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 496 v += 16; 497 } 498 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 499 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 500 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 501 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 502 idiag += 16; 503 i2 += 4; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+16*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 515 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 516 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 517 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 518 mdiag += 16; 519 i2 += 4; 520 } 521 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 522 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 523 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 524 } 525 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 526 idiag = a->idiag+16*a->mbs - 16; 527 i2 = 4*m - 4; 528 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 529 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 530 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 531 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 532 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 533 idiag -= 16; 534 i2 -= 4; 535 for (i=m-2; i>=0; i--) { 536 v = aa + 16*(diag[i]+1); 537 vi = aj + diag[i] + 1; 538 nz = ai[i+1] - diag[i] - 1; 539 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 540 while (nz--) { 541 idx = 4*(*vi++); 542 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 543 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 544 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 545 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 546 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 547 v += 16; 548 } 549 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 550 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 551 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 552 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 553 idiag -= 16; 554 i2 -= 4; 555 } 556 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 569 { 570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 571 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 572 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 573 const PetscScalar *b; 574 PetscErrorCode ierr; 575 PetscInt m = a->mbs,i,i2,nz,idx; 576 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 577 578 PetscFunctionBegin; 579 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 580 its = its*lits; 581 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 582 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 583 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 584 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 585 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 586 587 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 588 589 diag = a->diag; 590 idiag = a->idiag; 591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 592 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 593 594 if (flag & SOR_ZERO_INITIAL_GUESS) { 595 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 596 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 597 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 598 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 599 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 600 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 601 i2 = 5; 602 idiag += 25; 603 for (i=1; i<m; i++) { 604 v = aa + 25*ai[i]; 605 vi = aj + ai[i]; 606 nz = diag[i] - ai[i]; 607 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 608 while (nz--) { 609 idx = 5*(*vi++); 610 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 611 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 612 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 613 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 614 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 615 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 616 v += 25; 617 } 618 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 619 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 620 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 621 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 622 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 623 idiag += 25; 624 i2 += 5; 625 } 626 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 627 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 628 } 629 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 630 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 631 i2 = 0; 632 mdiag = a->idiag+25*a->mbs; 633 for (i=0; i<m; i++) { 634 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 635 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 636 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 637 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 638 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 639 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 640 mdiag += 25; 641 i2 += 5; 642 } 643 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 644 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 645 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 646 } 647 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 648 idiag = a->idiag+25*a->mbs - 25; 649 i2 = 5*m - 5; 650 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 651 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 652 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 653 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 654 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 655 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 656 idiag -= 25; 657 i2 -= 5; 658 for (i=m-2; i>=0; i--) { 659 v = aa + 25*(diag[i]+1); 660 vi = aj + diag[i] + 1; 661 nz = ai[i+1] - diag[i] - 1; 662 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 663 while (nz--) { 664 idx = 5*(*vi++); 665 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 666 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 667 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 668 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 669 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 670 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 671 v += 25; 672 } 673 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 674 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 675 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 676 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 677 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 678 idiag -= 25; 679 i2 -= 5; 680 } 681 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 682 } 683 } else { 684 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 685 } 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 694 { 695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 696 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 697 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 698 const PetscScalar *b; 699 PetscErrorCode ierr; 700 PetscInt m = a->mbs,i,i2,nz,idx; 701 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 702 703 PetscFunctionBegin; 704 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 705 its = its*lits; 706 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 707 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 708 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 709 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 710 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 711 712 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 713 714 diag = a->diag; 715 idiag = a->idiag; 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 718 719 if (flag & SOR_ZERO_INITIAL_GUESS) { 720 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 721 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 722 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 723 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 724 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 725 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 726 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 727 i2 = 6; 728 idiag += 36; 729 for (i=1; i<m; i++) { 730 v = aa + 36*ai[i]; 731 vi = aj + ai[i]; 732 nz = diag[i] - ai[i]; 733 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 734 while (nz--) { 735 idx = 6*(*vi++); 736 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 737 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 738 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 739 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 740 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 741 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 742 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 743 v += 36; 744 } 745 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 746 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 747 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 748 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 749 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 750 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 751 idiag += 36; 752 i2 += 6; 753 } 754 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 755 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 756 } 757 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 758 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 759 i2 = 0; 760 mdiag = a->idiag+36*a->mbs; 761 for (i=0; i<m; i++) { 762 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 763 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 764 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 765 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 766 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 767 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 768 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 769 mdiag += 36; 770 i2 += 6; 771 } 772 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 773 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 774 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 775 } 776 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 777 idiag = a->idiag+36*a->mbs - 36; 778 i2 = 6*m - 6; 779 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 780 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 781 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 782 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 783 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 784 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 785 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 786 idiag -= 36; 787 i2 -= 6; 788 for (i=m-2; i>=0; i--) { 789 v = aa + 36*(diag[i]+1); 790 vi = aj + diag[i] + 1; 791 nz = ai[i+1] - diag[i] - 1; 792 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 793 while (nz--) { 794 idx = 6*(*vi++); 795 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 796 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 797 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 798 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 799 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 800 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 801 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 802 v += 36; 803 } 804 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 805 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 806 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 807 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 808 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 809 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 810 idiag -= 36; 811 i2 -= 6; 812 } 813 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 814 } 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 817 } 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 826 { 827 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 828 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 829 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 830 const PetscScalar *b; 831 PetscErrorCode ierr; 832 PetscInt m = a->mbs,i,i2,nz,idx; 833 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 834 835 PetscFunctionBegin; 836 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 837 its = its*lits; 838 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 839 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 840 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 841 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 842 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 843 844 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 845 846 diag = a->diag; 847 idiag = a->idiag; 848 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 849 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 850 851 if (flag & SOR_ZERO_INITIAL_GUESS) { 852 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 853 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 854 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 855 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 856 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 857 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 858 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 859 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 860 i2 = 7; 861 idiag += 49; 862 for (i=1; i<m; i++) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = diag[i] - ai[i]; 866 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 867 while (nz--) { 868 idx = 7*(*vi++); 869 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 870 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 871 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 872 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 873 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 874 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 875 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 876 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 877 v += 49; 878 } 879 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 880 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 881 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 882 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 883 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 884 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 885 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 886 idiag += 49; 887 i2 += 7; 888 } 889 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 890 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 891 } 892 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 893 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 894 i2 = 0; 895 mdiag = a->idiag+49*a->mbs; 896 for (i=0; i<m; i++) { 897 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 898 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 899 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 900 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 901 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 902 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 903 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 904 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 905 mdiag += 36; 906 i2 += 6; 907 } 908 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 909 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 910 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 911 } 912 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 913 idiag = a->idiag+49*a->mbs - 49; 914 i2 = 7*m - 7; 915 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 916 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 917 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 918 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 919 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 920 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 921 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 922 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 923 idiag -= 49; 924 i2 -= 7; 925 for (i=m-2; i>=0; i--) { 926 v = aa + 49*(diag[i]+1); 927 vi = aj + diag[i] + 1; 928 nz = ai[i+1] - diag[i] - 1; 929 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 930 while (nz--) { 931 idx = 7*(*vi++); 932 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 933 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 934 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 935 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 936 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 937 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 938 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 939 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 940 v += 49; 941 } 942 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 943 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 944 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 945 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 946 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 947 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 948 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 949 idiag -= 49; 950 i2 -= 7; 951 } 952 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 953 } 954 } else { 955 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 956 } 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 /* 963 Special version for direct calls from Fortran (Used in PETSc-fun3d) 964 */ 965 #if defined(PETSC_HAVE_FORTRAN_CAPS) 966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 968 #define matsetvaluesblocked4_ matsetvaluesblocked4 969 #endif 970 971 EXTERN_C_BEGIN 972 #undef __FUNCT__ 973 #define __FUNCT__ "matsetvaluesblocked4_" 974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 975 { 976 Mat A = *AA; 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 979 PetscInt *ai=a->i,*ailen=a->ilen; 980 PetscInt *aj=a->j,stepval,lastcol = -1; 981 const PetscScalar *value = v; 982 MatScalar *ap,*aa = a->a,*bap; 983 984 PetscFunctionBegin; 985 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 986 stepval = (n-1)*4; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; 989 rp = aj + ai[row]; 990 ap = aa + 16*ai[row]; 991 nrow = ailen[row]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; 996 if (col <= lastcol) low = 0; else high = nrow; 997 lastcol = col; 998 value = v + k*(stepval+4 + l)*4; 999 while (high-low > 7) { 1000 t = (low+high)/2; 1001 if (rp[t] > col) high = t; 1002 else low = t; 1003 } 1004 for (i=low; i<high; i++) { 1005 if (rp[i] > col) break; 1006 if (rp[i] == col) { 1007 bap = ap + 16*i; 1008 for (ii=0; ii<4; ii++,value+=stepval) { 1009 for (jj=ii; jj<16; jj+=4) { 1010 bap[jj] += *value++; 1011 } 1012 } 1013 goto noinsert2; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column index thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 for (ii=N; ii>=i; ii--) { 1020 rp[ii+1] = rp[ii]; 1021 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1022 } 1023 if (N >= i) { 1024 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1025 } 1026 rp[i] = col; 1027 bap = ap + 16*i; 1028 for (ii=0; ii<4; ii++,value+=stepval) { 1029 for (jj=ii; jj<16; jj+=4) { 1030 bap[jj] = *value++; 1031 } 1032 } 1033 noinsert2:; 1034 low = i; 1035 } 1036 ailen[row] = nrow; 1037 } 1038 PetscFunctionReturnVoid(); 1039 } 1040 EXTERN_C_END 1041 1042 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1043 #define matsetvalues4_ MATSETVALUES4 1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1045 #define matsetvalues4_ matsetvalues4 1046 #endif 1047 1048 EXTERN_C_BEGIN 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatSetValues4_" 1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1052 { 1053 Mat A = *AA; 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1056 PetscInt *ai=a->i,*ailen=a->ilen; 1057 PetscInt *aj=a->j,brow,bcol; 1058 PetscInt ridx,cidx,lastcol = -1; 1059 MatScalar *ap,value,*aa=a->a,*bap; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<m; k++) { /* loop over added rows */ 1063 row = im[k]; brow = row/4; 1064 rp = aj + ai[brow]; 1065 ap = aa + 16*ai[brow]; 1066 nrow = ailen[brow]; 1067 low = 0; 1068 high = nrow; 1069 for (l=0; l<n; l++) { /* loop over added columns */ 1070 col = in[l]; bcol = col/4; 1071 ridx = row % 4; cidx = col % 4; 1072 value = v[l + k*n]; 1073 if (col <= lastcol) low = 0; else high = nrow; 1074 lastcol = col; 1075 while (high-low > 7) { 1076 t = (low+high)/2; 1077 if (rp[t] > bcol) high = t; 1078 else low = t; 1079 } 1080 for (i=low; i<high; i++) { 1081 if (rp[i] > bcol) break; 1082 if (rp[i] == bcol) { 1083 bap = ap + 16*i + 4*cidx + ridx; 1084 *bap += value; 1085 goto noinsert1; 1086 } 1087 } 1088 N = nrow++ - 1; 1089 high++; /* added new column thus must search to one higher than before */ 1090 /* shift up all the later entries in this row */ 1091 for (ii=N; ii>=i; ii--) { 1092 rp[ii+1] = rp[ii]; 1093 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1094 } 1095 if (N>=i) { 1096 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1097 } 1098 rp[i] = bcol; 1099 ap[16*i + 4*cidx + ridx] = value; 1100 noinsert1:; 1101 low = i; 1102 } 1103 ailen[brow] = nrow; 1104 } 1105 PetscFunctionReturnVoid(); 1106 } 1107 EXTERN_C_END 1108 1109 /* 1110 Checks for missing diagonals 1111 */ 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt *diag,*jj = a->j,i; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1122 *missing = PETSC_FALSE; 1123 if (A->rmap->n > 0 && !jj) { 1124 *missing = PETSC_TRUE; 1125 if (d) *d = 0; 1126 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1127 } else { 1128 diag = a->diag; 1129 for (i=0; i<a->mbs; i++) { 1130 if (jj[diag[i]] != i) { 1131 *missing = PETSC_TRUE; 1132 if (d) *d = i; 1133 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1134 } 1135 } 1136 } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1143 { 1144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscInt i,j,m = a->mbs; 1147 1148 PetscFunctionBegin; 1149 if (!a->diag) { 1150 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1151 } 1152 for (i=0; i<m; i++) { 1153 a->diag[i] = a->i[i+1]; 1154 for (j=a->i[i]; j<a->i[i+1]; j++) { 1155 if (a->j[j] == i) { 1156 a->diag[i] = j; 1157 break; 1158 } 1159 } 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1170 { 1171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1172 PetscErrorCode ierr; 1173 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1174 PetscInt *tia, *tja; 1175 1176 PetscFunctionBegin; 1177 *nn = n; 1178 if (!ia) PetscFunctionReturn(0); 1179 if (symmetric) { 1180 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1181 } else { 1182 tia = a->i; tja = a->j; 1183 } 1184 1185 if (!blockcompressed && bs > 1) { 1186 (*nn) *= bs; 1187 nbs = bs; 1188 /* malloc & create the natural set of indices */ 1189 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1190 if (n) { 1191 (*ia)[0] = 0; 1192 for (j=1; j<bs; j++) { 1193 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1194 } 1195 } 1196 1197 for (i=1; i<n; i++) { 1198 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1199 for (j=1; j<bs; j++) { 1200 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1201 } 1202 } 1203 if (n) { 1204 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1205 } 1206 1207 if (ja) { 1208 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<n; i++) { 1211 for (j=0; j<bs; j++) { 1212 for (k=tia[i]; k<tia[i+1]; k++) { 1213 for (l=0; l<bs; l++) { 1214 (*ja)[cnt++] = bs*tja[k] + l; 1215 } 1216 } 1217 } 1218 } 1219 } 1220 1221 n *= bs; 1222 nz *= bs*bs; 1223 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1224 ierr = PetscFree(tia);CHKERRQ(ierr); 1225 ierr = PetscFree(tja);CHKERRQ(ierr); 1226 } 1227 } else if (oshift == 1) { 1228 PetscInt nz = a->i[A->rmap->n/bs]; 1229 /* malloc space and add 1 to i and j indices */ 1230 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1231 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1232 if (ja) { 1233 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1234 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1235 } 1236 } else { 1237 *ia = tia; 1238 if (ja) *ja = tja; 1239 } 1240 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1246 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1247 { 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 if (!ia) PetscFunctionReturn(0); 1252 if (!blockcompressed && A->rmap->bs > 1) { 1253 ierr = PetscFree(*ia);CHKERRQ(ierr); 1254 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1255 } else if (symmetric || oshift == 1) { 1256 ierr = PetscFree(*ia);CHKERRQ(ierr); 1257 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1264 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 #if defined(PETSC_USE_LOG) 1271 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1272 #endif 1273 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1274 if (a->row) { 1275 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1276 } 1277 if (a->col) { 1278 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1279 } 1280 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1281 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1282 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1283 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1284 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1285 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1286 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1287 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1288 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1289 1290 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1291 ierr = PetscFree(a);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1307 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 switch (op) { 1314 case MAT_ROW_ORIENTED: 1315 a->roworiented = flg; 1316 break; 1317 case MAT_KEEP_NONZERO_PATTERN: 1318 a->keepnonzeropattern = flg; 1319 break; 1320 case MAT_NEW_NONZERO_LOCATIONS: 1321 a->nonew = (flg ? 0 : 1); 1322 break; 1323 case MAT_NEW_NONZERO_LOCATION_ERR: 1324 a->nonew = (flg ? -1 : 0); 1325 break; 1326 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1327 a->nonew = (flg ? -2 : 0); 1328 break; 1329 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1330 a->nounused = (flg ? -1 : 0); 1331 break; 1332 case MAT_NEW_DIAGONALS: 1333 case MAT_IGNORE_OFF_PROC_ENTRIES: 1334 case MAT_USE_HASH_TABLE: 1335 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1336 break; 1337 case MAT_SYMMETRIC: 1338 case MAT_STRUCTURALLY_SYMMETRIC: 1339 case MAT_HERMITIAN: 1340 case MAT_SYMMETRY_ETERNAL: 1341 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1342 break; 1343 default: 1344 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1351 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1352 { 1353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1354 PetscErrorCode ierr; 1355 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1356 MatScalar *aa,*aa_i; 1357 PetscScalar *v_i; 1358 1359 PetscFunctionBegin; 1360 bs = A->rmap->bs; 1361 ai = a->i; 1362 aj = a->j; 1363 aa = a->a; 1364 bs2 = a->bs2; 1365 1366 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1367 1368 bn = row/bs; /* Block number */ 1369 bp = row % bs; /* Block Position */ 1370 M = ai[bn+1] - ai[bn]; 1371 *nz = bs*M; 1372 1373 if (v) { 1374 *v = 0; 1375 if (*nz) { 1376 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1377 for (i=0; i<M; i++) { /* for each block in the block row */ 1378 v_i = *v + i*bs; 1379 aa_i = aa + bs2*(ai[bn] + i); 1380 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1381 } 1382 } 1383 } 1384 1385 if (idx) { 1386 *idx = 0; 1387 if (*nz) { 1388 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1389 for (i=0; i<M; i++) { /* for each block in the block row */ 1390 idx_i = *idx + i*bs; 1391 itmp = bs*aj[ai[bn] + i]; 1392 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1393 } 1394 } 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1401 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1402 { 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1407 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1413 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1414 { 1415 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1416 Mat C; 1417 PetscErrorCode ierr; 1418 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1419 PetscInt *rows,*cols,bs2=a->bs2; 1420 MatScalar *array; 1421 1422 PetscFunctionBegin; 1423 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1424 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1425 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1426 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1427 1428 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1429 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1430 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1431 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1432 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1433 ierr = PetscFree(col);CHKERRQ(ierr); 1434 } else { 1435 C = *B; 1436 } 1437 1438 array = a->a; 1439 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1440 cols = rows + bs; 1441 for (i=0; i<mbs; i++) { 1442 cols[0] = i*bs; 1443 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1444 len = ai[i+1] - ai[i]; 1445 for (j=0; j<len; j++) { 1446 rows[0] = (*aj++)*bs; 1447 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1448 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1449 array += bs2; 1450 } 1451 } 1452 ierr = PetscFree(rows);CHKERRQ(ierr); 1453 1454 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1456 1457 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1458 *B = C; 1459 } else { 1460 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1467 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1468 { 1469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1470 PetscErrorCode ierr; 1471 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1472 int fd; 1473 PetscScalar *aa; 1474 FILE *file; 1475 1476 PetscFunctionBegin; 1477 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1478 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1479 col_lens[0] = MAT_FILE_COOKIE; 1480 1481 col_lens[1] = A->rmap->N; 1482 col_lens[2] = A->cmap->n; 1483 col_lens[3] = a->nz*bs2; 1484 1485 /* store lengths of each row and write (including header) to file */ 1486 count = 0; 1487 for (i=0; i<a->mbs; i++) { 1488 for (j=0; j<bs; j++) { 1489 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1490 } 1491 } 1492 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1493 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1494 1495 /* store column indices (zero start index) */ 1496 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1497 count = 0; 1498 for (i=0; i<a->mbs; i++) { 1499 for (j=0; j<bs; j++) { 1500 for (k=a->i[i]; k<a->i[i+1]; k++) { 1501 for (l=0; l<bs; l++) { 1502 jj[count++] = bs*a->j[k] + l; 1503 } 1504 } 1505 } 1506 } 1507 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1508 ierr = PetscFree(jj);CHKERRQ(ierr); 1509 1510 /* store nonzero values */ 1511 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1512 count = 0; 1513 for (i=0; i<a->mbs; i++) { 1514 for (j=0; j<bs; j++) { 1515 for (k=a->i[i]; k<a->i[i+1]; k++) { 1516 for (l=0; l<bs; l++) { 1517 aa[count++] = a->a[bs2*k + l*bs + j]; 1518 } 1519 } 1520 } 1521 } 1522 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1523 ierr = PetscFree(aa);CHKERRQ(ierr); 1524 1525 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1526 if (file) { 1527 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1534 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1535 { 1536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1539 PetscViewerFormat format; 1540 1541 PetscFunctionBegin; 1542 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1543 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1544 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1545 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1546 Mat aij; 1547 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1548 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1549 ierr = MatDestroy(aij);CHKERRQ(ierr); 1550 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1551 PetscFunctionReturn(0); 1552 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1554 for (i=0; i<a->mbs; i++) { 1555 for (j=0; j<bs; j++) { 1556 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1557 for (k=a->i[i]; k<a->i[i+1]; k++) { 1558 for (l=0; l<bs; l++) { 1559 #if defined(PETSC_USE_COMPLEX) 1560 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1562 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1565 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } 1569 #else 1570 if (a->a[bs2*k + l*bs + j] != 0.0) { 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1572 } 1573 #endif 1574 } 1575 } 1576 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1577 } 1578 } 1579 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1580 } else { 1581 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1582 for (i=0; i<a->mbs; i++) { 1583 for (j=0; j<bs; j++) { 1584 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1585 for (k=a->i[i]; k<a->i[i+1]; k++) { 1586 for (l=0; l<bs; l++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1589 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1590 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1591 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1592 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1593 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1594 } else { 1595 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1596 } 1597 #else 1598 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1599 #endif 1600 } 1601 } 1602 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1603 } 1604 } 1605 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1606 } 1607 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1613 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1614 { 1615 Mat A = (Mat) Aa; 1616 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1617 PetscErrorCode ierr; 1618 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1619 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1620 MatScalar *aa; 1621 PetscViewer viewer; 1622 1623 PetscFunctionBegin; 1624 1625 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1626 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1627 1628 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1629 1630 /* loop over matrix elements drawing boxes */ 1631 color = PETSC_DRAW_BLUE; 1632 for (i=0,row=0; i<mbs; i++,row+=bs) { 1633 for (j=a->i[i]; j<a->i[i+1]; j++) { 1634 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1635 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1636 aa = a->a + j*bs2; 1637 for (k=0; k<bs; k++) { 1638 for (l=0; l<bs; l++) { 1639 if (PetscRealPart(*aa++) >= 0.) continue; 1640 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 } 1645 color = PETSC_DRAW_CYAN; 1646 for (i=0,row=0; i<mbs; i++,row+=bs) { 1647 for (j=a->i[i]; j<a->i[i+1]; j++) { 1648 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1649 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1650 aa = a->a + j*bs2; 1651 for (k=0; k<bs; k++) { 1652 for (l=0; l<bs; l++) { 1653 if (PetscRealPart(*aa++) != 0.) continue; 1654 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1655 } 1656 } 1657 } 1658 } 1659 1660 color = PETSC_DRAW_RED; 1661 for (i=0,row=0; i<mbs; i++,row+=bs) { 1662 for (j=a->i[i]; j<a->i[i+1]; j++) { 1663 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1664 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1665 aa = a->a + j*bs2; 1666 for (k=0; k<bs; k++) { 1667 for (l=0; l<bs; l++) { 1668 if (PetscRealPart(*aa++) <= 0.) continue; 1669 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 } 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1679 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1680 { 1681 PetscErrorCode ierr; 1682 PetscReal xl,yl,xr,yr,w,h; 1683 PetscDraw draw; 1684 PetscTruth isnull; 1685 1686 PetscFunctionBegin; 1687 1688 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1689 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1690 1691 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1692 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1693 xr += w; yr += h; xl = -w; yl = -h; 1694 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1695 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatView_SeqBAIJ" 1702 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1703 { 1704 PetscErrorCode ierr; 1705 PetscTruth iascii,isbinary,isdraw; 1706 1707 PetscFunctionBegin; 1708 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1709 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1710 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1711 if (iascii){ 1712 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1713 } else if (isbinary) { 1714 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1715 } else if (isdraw) { 1716 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1717 } else { 1718 Mat B; 1719 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1720 ierr = MatView(B,viewer);CHKERRQ(ierr); 1721 ierr = MatDestroy(B);CHKERRQ(ierr); 1722 } 1723 PetscFunctionReturn(0); 1724 } 1725 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1729 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1730 { 1731 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1732 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1733 PetscInt *ai = a->i,*ailen = a->ilen; 1734 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1735 MatScalar *ap,*aa = a->a; 1736 1737 PetscFunctionBegin; 1738 for (k=0; k<m; k++) { /* loop over rows */ 1739 row = im[k]; brow = row/bs; 1740 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1741 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1742 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1743 nrow = ailen[brow]; 1744 for (l=0; l<n; l++) { /* loop over columns */ 1745 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1746 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1747 col = in[l] ; 1748 bcol = col/bs; 1749 cidx = col%bs; 1750 ridx = row%bs; 1751 high = nrow; 1752 low = 0; /* assume unsorted */ 1753 while (high-low > 5) { 1754 t = (low+high)/2; 1755 if (rp[t] > bcol) high = t; 1756 else low = t; 1757 } 1758 for (i=low; i<high; i++) { 1759 if (rp[i] > bcol) break; 1760 if (rp[i] == bcol) { 1761 *v++ = ap[bs2*i+bs*cidx+ridx]; 1762 goto finished; 1763 } 1764 } 1765 *v++ = 0.0; 1766 finished:; 1767 } 1768 } 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #define CHUNKSIZE 10 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1775 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1776 { 1777 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1778 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1779 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1780 PetscErrorCode ierr; 1781 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1782 PetscTruth roworiented=a->roworiented; 1783 const PetscScalar *value = v; 1784 MatScalar *ap,*aa = a->a,*bap; 1785 1786 PetscFunctionBegin; 1787 if (roworiented) { 1788 stepval = (n-1)*bs; 1789 } else { 1790 stepval = (m-1)*bs; 1791 } 1792 for (k=0; k<m; k++) { /* loop over added rows */ 1793 row = im[k]; 1794 if (row < 0) continue; 1795 #if defined(PETSC_USE_DEBUG) 1796 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1797 #endif 1798 rp = aj + ai[row]; 1799 ap = aa + bs2*ai[row]; 1800 rmax = imax[row]; 1801 nrow = ailen[row]; 1802 low = 0; 1803 high = nrow; 1804 for (l=0; l<n; l++) { /* loop over added columns */ 1805 if (in[l] < 0) continue; 1806 #if defined(PETSC_USE_DEBUG) 1807 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1808 #endif 1809 col = in[l]; 1810 if (roworiented) { 1811 value = v + k*(stepval+bs)*bs + l*bs; 1812 } else { 1813 value = v + l*(stepval+bs)*bs + k*bs; 1814 } 1815 if (col <= lastcol) low = 0; else high = nrow; 1816 lastcol = col; 1817 while (high-low > 7) { 1818 t = (low+high)/2; 1819 if (rp[t] > col) high = t; 1820 else low = t; 1821 } 1822 for (i=low; i<high; i++) { 1823 if (rp[i] > col) break; 1824 if (rp[i] == col) { 1825 bap = ap + bs2*i; 1826 if (roworiented) { 1827 if (is == ADD_VALUES) { 1828 for (ii=0; ii<bs; ii++,value+=stepval) { 1829 for (jj=ii; jj<bs2; jj+=bs) { 1830 bap[jj] += *value++; 1831 } 1832 } 1833 } else { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] = *value++; 1837 } 1838 } 1839 } 1840 } else { 1841 if (is == ADD_VALUES) { 1842 for (ii=0; ii<bs; ii++,value+=stepval) { 1843 for (jj=0; jj<bs; jj++) { 1844 *bap++ += *value++; 1845 } 1846 } 1847 } else { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 *bap++ = *value++; 1851 } 1852 } 1853 } 1854 } 1855 goto noinsert2; 1856 } 1857 } 1858 if (nonew == 1) goto noinsert2; 1859 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1860 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1861 N = nrow++ - 1; high++; 1862 /* shift up all the later entries in this row */ 1863 for (ii=N; ii>=i; ii--) { 1864 rp[ii+1] = rp[ii]; 1865 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1866 } 1867 if (N >= i) { 1868 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1869 } 1870 rp[i] = col; 1871 bap = ap + bs2*i; 1872 if (roworiented) { 1873 for (ii=0; ii<bs; ii++,value+=stepval) { 1874 for (jj=ii; jj<bs2; jj+=bs) { 1875 bap[jj] = *value++; 1876 } 1877 } 1878 } else { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=0; jj<bs; jj++) { 1881 *bap++ = *value++; 1882 } 1883 } 1884 } 1885 noinsert2:; 1886 low = i; 1887 } 1888 ailen[row] = nrow; 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1895 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1896 { 1897 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1898 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1899 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1900 PetscErrorCode ierr; 1901 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1902 MatScalar *aa = a->a,*ap; 1903 PetscReal ratio=0.6; 1904 1905 PetscFunctionBegin; 1906 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1907 1908 if (m) rmax = ailen[0]; 1909 for (i=1; i<mbs; i++) { 1910 /* move each row back by the amount of empty slots (fshift) before it*/ 1911 fshift += imax[i-1] - ailen[i-1]; 1912 rmax = PetscMax(rmax,ailen[i]); 1913 if (fshift) { 1914 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1915 N = ailen[i]; 1916 for (j=0; j<N; j++) { 1917 ip[j-fshift] = ip[j]; 1918 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1919 } 1920 } 1921 ai[i] = ai[i-1] + ailen[i-1]; 1922 } 1923 if (mbs) { 1924 fshift += imax[mbs-1] - ailen[mbs-1]; 1925 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1926 } 1927 /* reset ilen and imax for each row */ 1928 for (i=0; i<mbs; i++) { 1929 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1930 } 1931 a->nz = ai[mbs]; 1932 1933 /* diagonals may have moved, so kill the diagonal pointers */ 1934 a->idiagvalid = PETSC_FALSE; 1935 if (fshift && a->diag) { 1936 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1937 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1938 a->diag = 0; 1939 } 1940 if (fshift && a->nounused == -1) { 1941 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1942 } 1943 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1944 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1945 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1946 a->reallocs = 0; 1947 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1948 1949 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1950 if (a->compressedrow.use){ 1951 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1952 } 1953 1954 A->same_nonzero = PETSC_TRUE; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 /* 1959 This function returns an array of flags which indicate the locations of contiguous 1960 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1961 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1962 Assume: sizes should be long enough to hold all the values. 1963 */ 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1966 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1967 { 1968 PetscInt i,j,k,row; 1969 PetscTruth flg; 1970 1971 PetscFunctionBegin; 1972 for (i=0,j=0; i<n; j++) { 1973 row = idx[i]; 1974 if (row%bs!=0) { /* Not the begining of a block */ 1975 sizes[j] = 1; 1976 i++; 1977 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1978 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1979 i++; 1980 } else { /* Begining of the block, so check if the complete block exists */ 1981 flg = PETSC_TRUE; 1982 for (k=1; k<bs; k++) { 1983 if (row+k != idx[i+k]) { /* break in the block */ 1984 flg = PETSC_FALSE; 1985 break; 1986 } 1987 } 1988 if (flg) { /* No break in the bs */ 1989 sizes[j] = bs; 1990 i+= bs; 1991 } else { 1992 sizes[j] = 1; 1993 i++; 1994 } 1995 } 1996 } 1997 *bs_max = j; 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2004 { 2005 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 PetscInt i,j,k,count,*rows; 2008 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2009 PetscScalar zero = 0.0; 2010 MatScalar *aa; 2011 2012 PetscFunctionBegin; 2013 /* Make a copy of the IS and sort it */ 2014 /* allocate memory for rows,sizes */ 2015 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2016 sizes = rows + is_n; 2017 2018 /* copy IS values to rows, and sort them */ 2019 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2020 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2021 if (baij->keepnonzeropattern) { 2022 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2023 bs_max = is_n; 2024 A->same_nonzero = PETSC_TRUE; 2025 } else { 2026 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2027 A->same_nonzero = PETSC_FALSE; 2028 } 2029 2030 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2031 row = rows[j]; 2032 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2033 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2034 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2035 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2036 if (diag != 0.0) { 2037 if (baij->ilen[row/bs] > 0) { 2038 baij->ilen[row/bs] = 1; 2039 baij->j[baij->i[row/bs]] = row/bs; 2040 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2041 } 2042 /* Now insert all the diagonal values for this bs */ 2043 for (k=0; k<bs; k++) { 2044 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2045 } 2046 } else { /* (diag == 0.0) */ 2047 baij->ilen[row/bs] = 0; 2048 } /* end (diag == 0.0) */ 2049 } else { /* (sizes[i] != bs) */ 2050 #if defined (PETSC_USE_DEBUG) 2051 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2052 #endif 2053 for (k=0; k<count; k++) { 2054 aa[0] = zero; 2055 aa += bs; 2056 } 2057 if (diag != 0.0) { 2058 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 } 2061 } 2062 2063 ierr = PetscFree(rows);CHKERRQ(ierr); 2064 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2070 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2071 { 2072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2073 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2074 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2075 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2076 PetscErrorCode ierr; 2077 PetscInt ridx,cidx,bs2=a->bs2; 2078 PetscTruth roworiented=a->roworiented; 2079 MatScalar *ap,value,*aa=a->a,*bap; 2080 2081 PetscFunctionBegin; 2082 for (k=0; k<m; k++) { /* loop over added rows */ 2083 row = im[k]; 2084 brow = row/bs; 2085 if (row < 0) continue; 2086 #if defined(PETSC_USE_DEBUG) 2087 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2088 #endif 2089 rp = aj + ai[brow]; 2090 ap = aa + bs2*ai[brow]; 2091 rmax = imax[brow]; 2092 nrow = ailen[brow]; 2093 low = 0; 2094 high = nrow; 2095 for (l=0; l<n; l++) { /* loop over added columns */ 2096 if (in[l] < 0) continue; 2097 #if defined(PETSC_USE_DEBUG) 2098 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2099 #endif 2100 col = in[l]; bcol = col/bs; 2101 ridx = row % bs; cidx = col % bs; 2102 if (roworiented) { 2103 value = v[l + k*n]; 2104 } else { 2105 value = v[k + l*m]; 2106 } 2107 if (col <= lastcol) low = 0; else high = nrow; 2108 lastcol = col; 2109 while (high-low > 7) { 2110 t = (low+high)/2; 2111 if (rp[t] > bcol) high = t; 2112 else low = t; 2113 } 2114 for (i=low; i<high; i++) { 2115 if (rp[i] > bcol) break; 2116 if (rp[i] == bcol) { 2117 bap = ap + bs2*i + bs*cidx + ridx; 2118 if (is == ADD_VALUES) *bap += value; 2119 else *bap = value; 2120 goto noinsert1; 2121 } 2122 } 2123 if (nonew == 1) goto noinsert1; 2124 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2126 N = nrow++ - 1; high++; 2127 /* shift up all the later entries in this row */ 2128 for (ii=N; ii>=i; ii--) { 2129 rp[ii+1] = rp[ii]; 2130 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2131 } 2132 if (N>=i) { 2133 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2134 } 2135 rp[i] = bcol; 2136 ap[bs2*i + bs*cidx + ridx] = value; 2137 a->nz++; 2138 noinsert1:; 2139 low = i; 2140 } 2141 ailen[brow] = nrow; 2142 } 2143 A->same_nonzero = PETSC_FALSE; 2144 PetscFunctionReturn(0); 2145 } 2146 2147 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2151 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2152 { 2153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2154 Mat outA; 2155 PetscErrorCode ierr; 2156 PetscTruth row_identity,col_identity; 2157 2158 PetscFunctionBegin; 2159 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2160 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2161 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2162 if (!row_identity || !col_identity) { 2163 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2164 } 2165 2166 outA = inA; 2167 inA->factor = MAT_FACTOR_LU; 2168 2169 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2170 2171 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2172 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2173 a->row = row; 2174 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2175 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2176 a->col = col; 2177 2178 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2179 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2180 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2181 2182 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2183 if (!a->solve_work) { 2184 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2185 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2186 } 2187 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2188 2189 PetscFunctionReturn(0); 2190 } 2191 2192 EXTERN_C_BEGIN 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2195 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2196 { 2197 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2198 PetscInt i,nz,mbs; 2199 2200 PetscFunctionBegin; 2201 nz = baij->maxnz/baij->bs2; 2202 mbs = baij->mbs; 2203 for (i=0; i<nz; i++) { 2204 baij->j[i] = indices[i]; 2205 } 2206 baij->nz = nz; 2207 for (i=0; i<mbs; i++) { 2208 baij->ilen[i] = baij->imax[i]; 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 EXTERN_C_END 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2216 /*@ 2217 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2218 in the matrix. 2219 2220 Input Parameters: 2221 + mat - the SeqBAIJ matrix 2222 - indices - the column indices 2223 2224 Level: advanced 2225 2226 Notes: 2227 This can be called if you have precomputed the nonzero structure of the 2228 matrix and want to provide it to the matrix object to improve the performance 2229 of the MatSetValues() operation. 2230 2231 You MUST have set the correct numbers of nonzeros per row in the call to 2232 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2233 2234 MUST be called before any calls to MatSetValues(); 2235 2236 @*/ 2237 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2238 { 2239 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2240 2241 PetscFunctionBegin; 2242 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2243 PetscValidPointer(indices,2); 2244 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2245 if (f) { 2246 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2247 } else { 2248 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2249 } 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2255 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2256 { 2257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2258 PetscErrorCode ierr; 2259 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2260 PetscReal atmp; 2261 PetscScalar *x,zero = 0.0; 2262 MatScalar *aa; 2263 PetscInt ncols,brow,krow,kcol; 2264 2265 PetscFunctionBegin; 2266 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2267 bs = A->rmap->bs; 2268 aa = a->a; 2269 ai = a->i; 2270 aj = a->j; 2271 mbs = a->mbs; 2272 2273 ierr = VecSet(v,zero);CHKERRQ(ierr); 2274 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2275 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2276 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2277 for (i=0; i<mbs; i++) { 2278 ncols = ai[1] - ai[0]; ai++; 2279 brow = bs*i; 2280 for (j=0; j<ncols; j++){ 2281 for (kcol=0; kcol<bs; kcol++){ 2282 for (krow=0; krow<bs; krow++){ 2283 atmp = PetscAbsScalar(*aa);aa++; 2284 row = brow + krow; /* row index */ 2285 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2286 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2287 } 2288 } 2289 aj++; 2290 } 2291 } 2292 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatCopy_SeqBAIJ" 2298 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2299 { 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 /* If the two matrices have the same copy implementation, use fast copy. */ 2304 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2305 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2306 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2307 2308 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2309 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2310 } 2311 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2312 } else { 2313 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2314 } 2315 PetscFunctionReturn(0); 2316 } 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2320 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2321 { 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2331 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2332 { 2333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2334 PetscFunctionBegin; 2335 *array = a->a; 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2341 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2342 { 2343 PetscFunctionBegin; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #include "petscblaslapack.h" 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2350 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2351 { 2352 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2353 PetscErrorCode ierr; 2354 PetscInt i,bs=Y->rmap->bs,j,bs2; 2355 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2356 2357 PetscFunctionBegin; 2358 if (str == SAME_NONZERO_PATTERN) { 2359 PetscScalar alpha = a; 2360 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2361 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2362 if (y->xtoy && y->XtoY != X) { 2363 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2364 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2365 } 2366 if (!y->xtoy) { /* get xtoy */ 2367 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2368 y->XtoY = X; 2369 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2370 } 2371 bs2 = bs*bs; 2372 for (i=0; i<x->nz; i++) { 2373 j = 0; 2374 while (j < bs2){ 2375 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2376 j++; 2377 } 2378 } 2379 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2380 } else { 2381 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2388 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2389 { 2390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2391 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2392 MatScalar *aa = a->a; 2393 2394 PetscFunctionBegin; 2395 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2401 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2402 { 2403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2404 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2405 MatScalar *aa = a->a; 2406 2407 PetscFunctionBegin; 2408 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2416 /* 2417 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2418 */ 2419 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2420 { 2421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2422 PetscErrorCode ierr; 2423 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2424 PetscInt nz = a->i[m],row,*jj,mr,col; 2425 2426 PetscFunctionBegin; 2427 *nn = n; 2428 if (!ia) PetscFunctionReturn(0); 2429 if (symmetric) { 2430 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2431 } else { 2432 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2433 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2434 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2435 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2436 jj = a->j; 2437 for (i=0; i<nz; i++) { 2438 collengths[jj[i]]++; 2439 } 2440 cia[0] = oshift; 2441 for (i=0; i<n; i++) { 2442 cia[i+1] = cia[i] + collengths[i]; 2443 } 2444 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2445 jj = a->j; 2446 for (row=0; row<m; row++) { 2447 mr = a->i[row+1] - a->i[row]; 2448 for (i=0; i<mr; i++) { 2449 col = *jj++; 2450 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2451 } 2452 } 2453 ierr = PetscFree(collengths);CHKERRQ(ierr); 2454 *ia = cia; *ja = cja; 2455 } 2456 PetscFunctionReturn(0); 2457 } 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2461 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2462 { 2463 PetscErrorCode ierr; 2464 2465 PetscFunctionBegin; 2466 if (!ia) PetscFunctionReturn(0); 2467 ierr = PetscFree(*ia);CHKERRQ(ierr); 2468 ierr = PetscFree(*ja);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2474 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2475 { 2476 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2477 PetscErrorCode ierr; 2478 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2479 PetscScalar dx,*y,*xx,*w3_array; 2480 PetscScalar *vscale_array; 2481 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2482 Vec w1=coloring->w1,w2=coloring->w2,w3; 2483 void *fctx = coloring->fctx; 2484 PetscTruth flg = PETSC_FALSE; 2485 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2486 Vec x1_tmp; 2487 2488 PetscFunctionBegin; 2489 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2490 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2491 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2492 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2493 2494 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2495 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2496 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2497 if (flg) { 2498 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2499 } else { 2500 PetscTruth assembled; 2501 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2502 if (assembled) { 2503 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2504 } 2505 } 2506 2507 x1_tmp = x1; 2508 if (!coloring->vscale){ 2509 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2510 } 2511 2512 /* 2513 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2514 coloring->F for the coarser grids from the finest 2515 */ 2516 if (coloring->F) { 2517 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2518 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2519 if (m1 != m2) { 2520 coloring->F = 0; 2521 } 2522 } 2523 2524 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2525 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2526 } 2527 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2528 2529 /* Set w1 = F(x1) */ 2530 if (coloring->F) { 2531 w1 = coloring->F; /* use already computed value of function */ 2532 coloring->F = 0; 2533 } else { 2534 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2535 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2536 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2537 } 2538 2539 if (!coloring->w3) { 2540 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2541 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2542 } 2543 w3 = coloring->w3; 2544 2545 CHKMEMQ; 2546 /* Compute all the local scale factors, including ghost points */ 2547 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2548 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2549 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2550 if (ctype == IS_COLORING_GHOSTED){ 2551 col_start = 0; col_end = N; 2552 } else if (ctype == IS_COLORING_GLOBAL){ 2553 xx = xx - start; 2554 vscale_array = vscale_array - start; 2555 col_start = start; col_end = N + start; 2556 } CHKMEMQ; 2557 for (col=col_start; col<col_end; col++){ 2558 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2559 if (coloring->htype[0] == 'w') { 2560 dx = 1.0 + unorm; 2561 } else { 2562 dx = xx[col]; 2563 } 2564 if (dx == 0.0) dx = 1.0; 2565 #if !defined(PETSC_USE_COMPLEX) 2566 if (dx < umin && dx >= 0.0) dx = umin; 2567 else if (dx < 0.0 && dx > -umin) dx = -umin; 2568 #else 2569 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2570 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2571 #endif 2572 dx *= epsilon; 2573 vscale_array[col] = 1.0/dx; 2574 } CHKMEMQ; 2575 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2576 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2577 if (ctype == IS_COLORING_GLOBAL){ 2578 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2579 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2580 } 2581 CHKMEMQ; 2582 if (coloring->vscaleforrow) { 2583 vscaleforrow = coloring->vscaleforrow; 2584 } else { 2585 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2586 } 2587 2588 2589 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2590 /* 2591 Loop over each color 2592 */ 2593 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2594 for (k=0; k<coloring->ncolors; k++) { 2595 coloring->currentcolor = k; 2596 for (i=0; i<bs; i++) { 2597 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2598 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2599 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2600 /* 2601 Loop over each column associated with color 2602 adding the perturbation to the vector w3. 2603 */ 2604 for (l=0; l<coloring->ncolumns[k]; l++) { 2605 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2606 if (coloring->htype[0] == 'w') { 2607 dx = 1.0 + unorm; 2608 } else { 2609 dx = xx[col]; 2610 } 2611 if (dx == 0.0) dx = 1.0; 2612 #if !defined(PETSC_USE_COMPLEX) 2613 if (dx < umin && dx >= 0.0) dx = umin; 2614 else if (dx < 0.0 && dx > -umin) dx = -umin; 2615 #else 2616 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2617 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2618 #endif 2619 dx *= epsilon; 2620 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2621 w3_array[col] += dx; 2622 } 2623 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2624 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2625 2626 /* 2627 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2628 w2 = F(x1 + dx) - F(x1) 2629 */ 2630 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2631 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2632 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2633 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2634 2635 /* 2636 Loop over rows of vector, putting results into Jacobian matrix 2637 */ 2638 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2639 for (l=0; l<coloring->nrows[k]; l++) { 2640 row = bs*coloring->rows[k][l]; /* local row index */ 2641 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2642 for (j=0; j<bs; j++) { 2643 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2644 srows[j] = row + start + j; 2645 } 2646 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2647 } 2648 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2649 } 2650 } /* endof for each color */ 2651 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2652 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2653 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2654 ierr = PetscFree(srows);CHKERRQ(ierr); 2655 2656 coloring->currentcolor = -1; 2657 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 /* -------------------------------------------------------------------*/ 2664 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2665 MatGetRow_SeqBAIJ, 2666 MatRestoreRow_SeqBAIJ, 2667 MatMult_SeqBAIJ_N, 2668 /* 4*/ MatMultAdd_SeqBAIJ_N, 2669 MatMultTranspose_SeqBAIJ, 2670 MatMultTransposeAdd_SeqBAIJ, 2671 0, 2672 0, 2673 0, 2674 /*10*/ 0, 2675 MatLUFactor_SeqBAIJ, 2676 0, 2677 0, 2678 MatTranspose_SeqBAIJ, 2679 /*15*/ MatGetInfo_SeqBAIJ, 2680 MatEqual_SeqBAIJ, 2681 MatGetDiagonal_SeqBAIJ, 2682 MatDiagonalScale_SeqBAIJ, 2683 MatNorm_SeqBAIJ, 2684 /*20*/ 0, 2685 MatAssemblyEnd_SeqBAIJ, 2686 MatSetOption_SeqBAIJ, 2687 MatZeroEntries_SeqBAIJ, 2688 /*24*/ MatZeroRows_SeqBAIJ, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2694 0, 2695 0, 2696 MatGetArray_SeqBAIJ, 2697 MatRestoreArray_SeqBAIJ, 2698 /*34*/ MatDuplicate_SeqBAIJ, 2699 0, 2700 0, 2701 MatILUFactor_SeqBAIJ, 2702 0, 2703 /*39*/ MatAXPY_SeqBAIJ, 2704 MatGetSubMatrices_SeqBAIJ, 2705 MatIncreaseOverlap_SeqBAIJ, 2706 MatGetValues_SeqBAIJ, 2707 MatCopy_SeqBAIJ, 2708 /*44*/ 0, 2709 MatScale_SeqBAIJ, 2710 0, 2711 0, 2712 MatILUDTFactor_SeqBAIJ, 2713 /*49*/ 0, 2714 MatGetRowIJ_SeqBAIJ, 2715 MatRestoreRowIJ_SeqBAIJ, 2716 MatGetColumnIJ_SeqBAIJ, 2717 MatRestoreColumnIJ_SeqBAIJ, 2718 /*54*/ MatFDColoringCreate_SeqAIJ, 2719 0, 2720 0, 2721 0, 2722 MatSetValuesBlocked_SeqBAIJ, 2723 /*59*/ MatGetSubMatrix_SeqBAIJ, 2724 MatDestroy_SeqBAIJ, 2725 MatView_SeqBAIJ, 2726 0, 2727 0, 2728 /*64*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2734 0, 2735 MatConvert_Basic, 2736 0, 2737 0, 2738 /*74*/ 0, 2739 MatFDColoringApply_BAIJ, 2740 0, 2741 0, 2742 0, 2743 /*79*/ 0, 2744 0, 2745 0, 2746 0, 2747 MatLoad_SeqBAIJ, 2748 /*84*/ 0, 2749 0, 2750 0, 2751 0, 2752 0, 2753 /*89*/ 0, 2754 0, 2755 0, 2756 0, 2757 0, 2758 /*94*/ 0, 2759 0, 2760 0, 2761 0, 2762 0, 2763 /*99*/0, 2764 0, 2765 0, 2766 0, 2767 0, 2768 /*104*/0, 2769 MatRealPart_SeqBAIJ, 2770 MatImaginaryPart_SeqBAIJ, 2771 0, 2772 0, 2773 /*109*/0, 2774 0, 2775 0, 2776 0, 2777 MatMissingDiagonal_SeqBAIJ 2778 /*114*/ 2779 }; 2780 2781 EXTERN_C_BEGIN 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2784 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2785 { 2786 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2787 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2788 PetscErrorCode ierr; 2789 2790 PetscFunctionBegin; 2791 if (aij->nonew != 1) { 2792 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2793 } 2794 2795 /* allocate space for values if not already there */ 2796 if (!aij->saved_values) { 2797 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2798 } 2799 2800 /* copy values over */ 2801 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2802 PetscFunctionReturn(0); 2803 } 2804 EXTERN_C_END 2805 2806 EXTERN_C_BEGIN 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2809 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2810 { 2811 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2812 PetscErrorCode ierr; 2813 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2814 2815 PetscFunctionBegin; 2816 if (aij->nonew != 1) { 2817 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2818 } 2819 if (!aij->saved_values) { 2820 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2821 } 2822 2823 /* copy values over */ 2824 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 EXTERN_C_END 2828 2829 EXTERN_C_BEGIN 2830 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2831 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2832 EXTERN_C_END 2833 2834 EXTERN_C_BEGIN 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2837 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2838 { 2839 Mat_SeqBAIJ *b; 2840 PetscErrorCode ierr; 2841 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2842 PetscTruth flg,skipallocation = PETSC_FALSE; 2843 2844 PetscFunctionBegin; 2845 2846 if (nz == MAT_SKIP_ALLOCATION) { 2847 skipallocation = PETSC_TRUE; 2848 nz = 0; 2849 } 2850 2851 if (bs < 0) { 2852 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2853 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2854 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2855 bs = PetscAbs(bs); 2856 } 2857 if (nnz && newbs != bs) { 2858 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2859 } 2860 bs = newbs; 2861 2862 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2863 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2864 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2865 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2866 2867 B->preallocated = PETSC_TRUE; 2868 2869 mbs = B->rmap->n/bs; 2870 nbs = B->cmap->n/bs; 2871 bs2 = bs*bs; 2872 2873 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2874 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2875 } 2876 2877 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2878 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2879 if (nnz) { 2880 for (i=0; i<mbs; i++) { 2881 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2882 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2883 } 2884 } 2885 2886 b = (Mat_SeqBAIJ*)B->data; 2887 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2888 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2889 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2890 2891 if (!flg) { 2892 switch (bs) { 2893 case 1: 2894 B->ops->mult = MatMult_SeqBAIJ_1; 2895 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2896 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2897 break; 2898 case 2: 2899 B->ops->mult = MatMult_SeqBAIJ_2; 2900 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2901 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2902 break; 2903 case 3: 2904 B->ops->mult = MatMult_SeqBAIJ_3; 2905 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2906 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2907 break; 2908 case 4: 2909 B->ops->mult = MatMult_SeqBAIJ_4; 2910 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2911 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2912 break; 2913 case 5: 2914 B->ops->mult = MatMult_SeqBAIJ_5; 2915 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2916 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2917 break; 2918 case 6: 2919 B->ops->mult = MatMult_SeqBAIJ_6; 2920 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2921 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2922 break; 2923 case 7: 2924 B->ops->mult = MatMult_SeqBAIJ_7; 2925 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2926 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2927 break; 2928 default: 2929 B->ops->mult = MatMult_SeqBAIJ_N; 2930 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2931 break; 2932 } 2933 } 2934 B->rmap->bs = bs; 2935 b->mbs = mbs; 2936 b->nbs = nbs; 2937 if (!skipallocation) { 2938 if (!b->imax) { 2939 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2940 } 2941 /* b->ilen will count nonzeros in each block row so far. */ 2942 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2943 if (!nnz) { 2944 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2945 else if (nz <= 0) nz = 1; 2946 for (i=0; i<mbs; i++) b->imax[i] = nz; 2947 nz = nz*mbs; 2948 } else { 2949 nz = 0; 2950 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2951 } 2952 2953 /* allocate the matrix space */ 2954 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2955 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2957 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2958 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2959 b->singlemalloc = PETSC_TRUE; 2960 b->i[0] = 0; 2961 for (i=1; i<mbs+1; i++) { 2962 b->i[i] = b->i[i-1] + b->imax[i-1]; 2963 } 2964 b->free_a = PETSC_TRUE; 2965 b->free_ij = PETSC_TRUE; 2966 } else { 2967 b->free_a = PETSC_FALSE; 2968 b->free_ij = PETSC_FALSE; 2969 } 2970 2971 B->rmap->bs = bs; 2972 b->bs2 = bs2; 2973 b->mbs = mbs; 2974 b->nz = 0; 2975 b->maxnz = nz*bs2; 2976 B->info.nz_unneeded = (PetscReal)b->maxnz; 2977 PetscFunctionReturn(0); 2978 } 2979 EXTERN_C_END 2980 2981 EXTERN_C_BEGIN 2982 #undef __FUNCT__ 2983 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2984 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2985 { 2986 PetscInt i,m,nz,nz_max=0,*nnz; 2987 PetscScalar *values=0; 2988 PetscErrorCode ierr; 2989 2990 PetscFunctionBegin; 2991 2992 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2993 2994 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2995 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2996 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2997 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2998 m = B->rmap->n/bs; 2999 3000 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3001 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3002 for(i=0; i<m; i++) { 3003 nz = ii[i+1]- ii[i]; 3004 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3005 nz_max = PetscMax(nz_max, nz); 3006 nnz[i] = nz; 3007 } 3008 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3009 ierr = PetscFree(nnz);CHKERRQ(ierr); 3010 3011 values = (PetscScalar*)V; 3012 if (!values) { 3013 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3014 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3015 } 3016 for (i=0; i<m; i++) { 3017 PetscInt ncols = ii[i+1] - ii[i]; 3018 const PetscInt *icols = jj + ii[i]; 3019 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3020 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3021 } 3022 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3023 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 3026 PetscFunctionReturn(0); 3027 } 3028 EXTERN_C_END 3029 3030 3031 EXTERN_C_BEGIN 3032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3033 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3034 EXTERN_C_END 3035 3036 /*MC 3037 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3038 block sparse compressed row format. 3039 3040 Options Database Keys: 3041 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3042 3043 Level: beginner 3044 3045 .seealso: MatCreateSeqBAIJ() 3046 M*/ 3047 3048 3049 EXTERN_C_BEGIN 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "MatCreate_SeqBAIJ" 3052 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3053 { 3054 PetscErrorCode ierr; 3055 PetscMPIInt size; 3056 Mat_SeqBAIJ *b; 3057 3058 PetscFunctionBegin; 3059 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3060 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3061 3062 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3063 B->data = (void*)b; 3064 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3065 B->mapping = 0; 3066 b->row = 0; 3067 b->col = 0; 3068 b->icol = 0; 3069 b->reallocs = 0; 3070 b->saved_values = 0; 3071 3072 b->roworiented = PETSC_TRUE; 3073 b->nonew = 0; 3074 b->diag = 0; 3075 b->solve_work = 0; 3076 b->mult_work = 0; 3077 B->spptr = 0; 3078 B->info.nz_unneeded = (PetscReal)b->maxnz; 3079 b->keepnonzeropattern = PETSC_FALSE; 3080 b->xtoy = 0; 3081 b->XtoY = 0; 3082 b->compressedrow.use = PETSC_FALSE; 3083 b->compressedrow.nrows = 0; 3084 b->compressedrow.i = PETSC_NULL; 3085 b->compressedrow.rindex = PETSC_NULL; 3086 b->compressedrow.checked = PETSC_FALSE; 3087 B->same_nonzero = PETSC_FALSE; 3088 3089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 3090 "MatGetFactorAvailable_seqbaij_petsc", 3091 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 3093 "MatGetFactor_seqbaij_petsc", 3094 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3096 "MatInvertBlockDiagonal_SeqBAIJ", 3097 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3099 "MatStoreValues_SeqBAIJ", 3100 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3102 "MatRetrieveValues_SeqBAIJ", 3103 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3105 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3106 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3108 "MatConvert_SeqBAIJ_SeqAIJ", 3109 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3111 "MatConvert_SeqBAIJ_SeqSBAIJ", 3112 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3114 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3115 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3117 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3118 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3119 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3120 PetscFunctionReturn(0); 3121 } 3122 EXTERN_C_END 3123 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3126 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 3127 { 3128 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3129 PetscErrorCode ierr; 3130 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3131 3132 PetscFunctionBegin; 3133 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3134 3135 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3136 for (i=0; i<mbs; i++) { 3137 c->imax[i] = a->imax[i]; 3138 c->ilen[i] = a->ilen[i]; 3139 } 3140 3141 /* allocate the matrix space */ 3142 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3143 c->singlemalloc = PETSC_TRUE; 3144 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3145 if (mbs > 0) { 3146 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3147 if (cpvalues == MAT_COPY_VALUES) { 3148 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3149 } else { 3150 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3151 } 3152 } 3153 c->roworiented = a->roworiented; 3154 c->nonew = a->nonew; 3155 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 3156 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 3157 c->bs2 = a->bs2; 3158 c->mbs = a->mbs; 3159 c->nbs = a->nbs; 3160 3161 if (a->diag) { 3162 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3163 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3164 for (i=0; i<mbs; i++) { 3165 c->diag[i] = a->diag[i]; 3166 } 3167 } else c->diag = 0; 3168 c->nz = a->nz; 3169 c->maxnz = a->maxnz; 3170 c->solve_work = 0; 3171 c->mult_work = 0; 3172 c->free_a = PETSC_TRUE; 3173 c->free_ij = PETSC_TRUE; 3174 C->preallocated = PETSC_TRUE; 3175 C->assembled = PETSC_TRUE; 3176 3177 c->compressedrow.use = a->compressedrow.use; 3178 c->compressedrow.nrows = a->compressedrow.nrows; 3179 c->compressedrow.checked = a->compressedrow.checked; 3180 if ( a->compressedrow.checked && a->compressedrow.use){ 3181 i = a->compressedrow.nrows; 3182 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 3183 c->compressedrow.rindex = c->compressedrow.i + i + 1; 3184 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3185 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3186 } else { 3187 c->compressedrow.use = PETSC_FALSE; 3188 c->compressedrow.i = PETSC_NULL; 3189 c->compressedrow.rindex = PETSC_NULL; 3190 } 3191 C->same_nonzero = A->same_nonzero; 3192 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3193 PetscFunctionReturn(0); 3194 } 3195 3196 #undef __FUNCT__ 3197 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3198 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3199 { 3200 PetscErrorCode ierr; 3201 3202 PetscFunctionBegin; 3203 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3204 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3205 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3206 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "MatLoad_SeqBAIJ" 3212 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3213 { 3214 Mat_SeqBAIJ *a; 3215 Mat B; 3216 PetscErrorCode ierr; 3217 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3218 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3219 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3220 PetscInt *masked,nmask,tmp,bs2,ishift; 3221 PetscMPIInt size; 3222 int fd; 3223 PetscScalar *aa; 3224 MPI_Comm comm = ((PetscObject)viewer)->comm; 3225 3226 PetscFunctionBegin; 3227 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3228 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3229 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3230 bs2 = bs*bs; 3231 3232 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3233 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3234 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3235 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3236 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3237 M = header[1]; N = header[2]; nz = header[3]; 3238 3239 if (header[3] < 0) { 3240 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3241 } 3242 3243 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3244 3245 /* 3246 This code adds extra rows to make sure the number of rows is 3247 divisible by the blocksize 3248 */ 3249 mbs = M/bs; 3250 extra_rows = bs - M + bs*(mbs); 3251 if (extra_rows == bs) extra_rows = 0; 3252 else mbs++; 3253 if (extra_rows) { 3254 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3255 } 3256 3257 /* read in row lengths */ 3258 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3259 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3260 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3261 3262 /* read in column indices */ 3263 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3264 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3265 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3266 3267 /* loop over row lengths determining block row lengths */ 3268 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3269 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3270 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3271 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3272 masked = mask + mbs; 3273 rowcount = 0; nzcount = 0; 3274 for (i=0; i<mbs; i++) { 3275 nmask = 0; 3276 for (j=0; j<bs; j++) { 3277 kmax = rowlengths[rowcount]; 3278 for (k=0; k<kmax; k++) { 3279 tmp = jj[nzcount++]/bs; 3280 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3281 } 3282 rowcount++; 3283 } 3284 browlengths[i] += nmask; 3285 /* zero out the mask elements we set */ 3286 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3287 } 3288 3289 /* create our matrix */ 3290 ierr = MatCreate(comm,&B); 3291 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3292 ierr = MatSetType(B,type);CHKERRQ(ierr); 3293 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3294 a = (Mat_SeqBAIJ*)B->data; 3295 3296 /* set matrix "i" values */ 3297 a->i[0] = 0; 3298 for (i=1; i<= mbs; i++) { 3299 a->i[i] = a->i[i-1] + browlengths[i-1]; 3300 a->ilen[i-1] = browlengths[i-1]; 3301 } 3302 a->nz = 0; 3303 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3304 3305 /* read in nonzero values */ 3306 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3307 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3308 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3309 3310 /* set "a" and "j" values into matrix */ 3311 nzcount = 0; jcount = 0; 3312 for (i=0; i<mbs; i++) { 3313 nzcountb = nzcount; 3314 nmask = 0; 3315 for (j=0; j<bs; j++) { 3316 kmax = rowlengths[i*bs+j]; 3317 for (k=0; k<kmax; k++) { 3318 tmp = jj[nzcount++]/bs; 3319 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3320 } 3321 } 3322 /* sort the masked values */ 3323 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3324 3325 /* set "j" values into matrix */ 3326 maskcount = 1; 3327 for (j=0; j<nmask; j++) { 3328 a->j[jcount++] = masked[j]; 3329 mask[masked[j]] = maskcount++; 3330 } 3331 /* set "a" values into matrix */ 3332 ishift = bs2*a->i[i]; 3333 for (j=0; j<bs; j++) { 3334 kmax = rowlengths[i*bs+j]; 3335 for (k=0; k<kmax; k++) { 3336 tmp = jj[nzcountb]/bs ; 3337 block = mask[tmp] - 1; 3338 point = jj[nzcountb] - bs*tmp; 3339 idx = ishift + bs2*block + j + bs*point; 3340 a->a[idx] = (MatScalar)aa[nzcountb++]; 3341 } 3342 } 3343 /* zero out the mask elements we set */ 3344 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3345 } 3346 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3347 3348 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3349 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3350 ierr = PetscFree(aa);CHKERRQ(ierr); 3351 ierr = PetscFree(jj);CHKERRQ(ierr); 3352 ierr = PetscFree(mask);CHKERRQ(ierr); 3353 3354 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3355 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3356 ierr = MatView_Private(B);CHKERRQ(ierr); 3357 3358 *A = B; 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "MatCreateSeqBAIJ" 3364 /*@C 3365 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3366 compressed row) format. For good matrix assembly performance the 3367 user should preallocate the matrix storage by setting the parameter nz 3368 (or the array nnz). By setting these parameters accurately, performance 3369 during matrix assembly can be increased by more than a factor of 50. 3370 3371 Collective on MPI_Comm 3372 3373 Input Parameters: 3374 + comm - MPI communicator, set to PETSC_COMM_SELF 3375 . bs - size of block 3376 . m - number of rows 3377 . n - number of columns 3378 . nz - number of nonzero blocks per block row (same for all rows) 3379 - nnz - array containing the number of nonzero blocks in the various block rows 3380 (possibly different for each block row) or PETSC_NULL 3381 3382 Output Parameter: 3383 . A - the matrix 3384 3385 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3386 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3387 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3388 3389 Options Database Keys: 3390 . -mat_no_unroll - uses code that does not unroll the loops in the 3391 block calculations (much slower) 3392 . -mat_block_size - size of the blocks to use 3393 3394 Level: intermediate 3395 3396 Notes: 3397 The number of rows and columns must be divisible by blocksize. 3398 3399 If the nnz parameter is given then the nz parameter is ignored 3400 3401 A nonzero block is any block that as 1 or more nonzeros in it 3402 3403 The block AIJ format is fully compatible with standard Fortran 77 3404 storage. That is, the stored row and column indices can begin at 3405 either one (as in Fortran) or zero. See the users' manual for details. 3406 3407 Specify the preallocated storage with either nz or nnz (not both). 3408 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3409 allocation. For additional details, see the users manual chapter on 3410 matrices. 3411 3412 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3413 @*/ 3414 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3415 { 3416 PetscErrorCode ierr; 3417 3418 PetscFunctionBegin; 3419 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3420 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3421 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3422 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3423 PetscFunctionReturn(0); 3424 } 3425 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3428 /*@C 3429 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3430 per row in the matrix. For good matrix assembly performance the 3431 user should preallocate the matrix storage by setting the parameter nz 3432 (or the array nnz). By setting these parameters accurately, performance 3433 during matrix assembly can be increased by more than a factor of 50. 3434 3435 Collective on MPI_Comm 3436 3437 Input Parameters: 3438 + A - the matrix 3439 . bs - size of block 3440 . nz - number of block nonzeros per block row (same for all rows) 3441 - nnz - array containing the number of block nonzeros in the various block rows 3442 (possibly different for each block row) or PETSC_NULL 3443 3444 Options Database Keys: 3445 . -mat_no_unroll - uses code that does not unroll the loops in the 3446 block calculations (much slower) 3447 . -mat_block_size - size of the blocks to use 3448 3449 Level: intermediate 3450 3451 Notes: 3452 If the nnz parameter is given then the nz parameter is ignored 3453 3454 You can call MatGetInfo() to get information on how effective the preallocation was; 3455 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3456 You can also run with the option -info and look for messages with the string 3457 malloc in them to see if additional memory allocation was needed. 3458 3459 The block AIJ format is fully compatible with standard Fortran 77 3460 storage. That is, the stored row and column indices can begin at 3461 either one (as in Fortran) or zero. See the users' manual for details. 3462 3463 Specify the preallocated storage with either nz or nnz (not both). 3464 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3465 allocation. For additional details, see the users manual chapter on 3466 matrices. 3467 3468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3469 @*/ 3470 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3471 { 3472 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3473 3474 PetscFunctionBegin; 3475 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3476 if (f) { 3477 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3478 } 3479 PetscFunctionReturn(0); 3480 } 3481 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3484 /*@C 3485 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3486 (the default sequential PETSc format). 3487 3488 Collective on MPI_Comm 3489 3490 Input Parameters: 3491 + A - the matrix 3492 . i - the indices into j for the start of each local row (starts with zero) 3493 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3494 - v - optional values in the matrix 3495 3496 Level: developer 3497 3498 .keywords: matrix, aij, compressed row, sparse 3499 3500 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3501 @*/ 3502 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3503 { 3504 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3505 3506 PetscFunctionBegin; 3507 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3508 if (f) { 3509 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3510 } 3511 PetscFunctionReturn(0); 3512 } 3513 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3517 /*@ 3518 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3519 (upper triangular entries in CSR format) provided by the user. 3520 3521 Collective on MPI_Comm 3522 3523 Input Parameters: 3524 + comm - must be an MPI communicator of size 1 3525 . bs - size of block 3526 . m - number of rows 3527 . n - number of columns 3528 . i - row indices 3529 . j - column indices 3530 - a - matrix values 3531 3532 Output Parameter: 3533 . mat - the matrix 3534 3535 Level: intermediate 3536 3537 Notes: 3538 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3539 once the matrix is destroyed 3540 3541 You cannot set new nonzero locations into this matrix, that will generate an error. 3542 3543 The i and j indices are 0 based 3544 3545 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3546 3547 @*/ 3548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3549 { 3550 PetscErrorCode ierr; 3551 PetscInt ii; 3552 Mat_SeqBAIJ *baij; 3553 3554 PetscFunctionBegin; 3555 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3556 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3557 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3560 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3561 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3562 baij = (Mat_SeqBAIJ*)(*mat)->data; 3563 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3564 3565 baij->i = i; 3566 baij->j = j; 3567 baij->a = a; 3568 baij->singlemalloc = PETSC_FALSE; 3569 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3570 baij->free_a = PETSC_FALSE; 3571 baij->free_ij = PETSC_FALSE; 3572 3573 for (ii=0; ii<m; ii++) { 3574 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3575 #if defined(PETSC_USE_DEBUG) 3576 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3577 #endif 3578 } 3579 #if defined(PETSC_USE_DEBUG) 3580 for (ii=0; ii<baij->i[m]; ii++) { 3581 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3582 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3583 } 3584 #endif 3585 3586 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3587 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3588 PetscFunctionReturn(0); 3589 } 3590