1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,idx; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 while (nz--) { 175 idx = (*vi++); 176 x1 = x[idx]; 177 s1 -= v[0]*x1; 178 v += 1; 179 } 180 x[i2] = idiag[0]*s1; 181 idiag += 1; 182 i2 += 1; 183 } 184 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 185 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 186 } 187 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 188 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 189 i2 = 0; 190 mdiag = a->idiag+a->mbs; 191 for (i=0; i<m; i++) { 192 x1 = x[i2]; 193 x[i2] = mdiag[0]*x1; 194 mdiag += 1; 195 i2 += 1; 196 } 197 ierr = PetscLogFlops(m);CHKERRQ(ierr); 198 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 199 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 200 } 201 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 202 idiag = a->idiag+a->mbs - 1; 203 i2 = m - 1; 204 x1 = x[i2]; 205 x[i2] = idiag[0]*x1; 206 idiag -= 1; 207 i2 -= 1; 208 for (i=m-2; i>=0; i--) { 209 v = aa + (diag[i]+1); 210 vi = aj + diag[i] + 1; 211 nz = ai[i+1] - diag[i] - 1; 212 s1 = x[i2]; 213 while (nz--) { 214 idx = (*vi++); 215 x1 = x[idx]; 216 s1 -= v[0]*x1; 217 v += 1; 218 } 219 x[i2] = idiag[0]*s1; 220 idiag -= 1; 221 i2 -= 1; 222 } 223 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 224 } 225 } else { 226 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 227 } 228 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 229 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *x,x1,x2,s1,s2; 239 const PetscScalar *b; 240 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 241 PetscErrorCode ierr; 242 PetscInt m = a->mbs,i,i2,nz,idx; 243 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 244 245 PetscFunctionBegin; 246 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 247 its = its*lits; 248 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 249 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 250 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 251 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 252 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 253 254 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 255 256 diag = a->diag; 257 idiag = a->idiag; 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 260 261 if (flag & SOR_ZERO_INITIAL_GUESS) { 262 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 263 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 264 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 265 i2 = 2; 266 idiag += 4; 267 for (i=1; i<m; i++) { 268 v = aa + 4*ai[i]; 269 vi = aj + ai[i]; 270 nz = diag[i] - ai[i]; 271 s1 = b[i2]; s2 = b[i2+1]; 272 while (nz--) { 273 idx = 2*(*vi++); 274 x1 = x[idx]; x2 = x[1+idx]; 275 s1 -= v[0]*x1 + v[2]*x2; 276 s2 -= v[1]*x1 + v[3]*x2; 277 v += 4; 278 } 279 x[i2] = idiag[0]*s1 + idiag[2]*s2; 280 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 281 idiag += 4; 282 i2 += 2; 283 } 284 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 285 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 286 } 287 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 288 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 289 i2 = 0; 290 mdiag = a->idiag+4*a->mbs; 291 for (i=0; i<m; i++) { 292 x1 = x[i2]; x2 = x[i2+1]; 293 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 294 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 295 mdiag += 4; 296 i2 += 2; 297 } 298 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 299 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 300 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 301 } 302 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 303 idiag = a->idiag+4*a->mbs - 4; 304 i2 = 2*m - 2; 305 x1 = x[i2]; x2 = x[i2+1]; 306 x[i2] = idiag[0]*x1 + idiag[2]*x2; 307 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 308 idiag -= 4; 309 i2 -= 2; 310 for (i=m-2; i>=0; i--) { 311 v = aa + 4*(diag[i]+1); 312 vi = aj + diag[i] + 1; 313 nz = ai[i+1] - diag[i] - 1; 314 s1 = x[i2]; s2 = x[i2+1]; 315 while (nz--) { 316 idx = 2*(*vi++); 317 x1 = x[idx]; x2 = x[1+idx]; 318 s1 -= v[0]*x1 + v[2]*x2; 319 s2 -= v[1]*x1 + v[3]*x2; 320 v += 4; 321 } 322 x[i2] = idiag[0]*s1 + idiag[2]*s2; 323 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 324 idiag -= 4; 325 i2 -= 2; 326 } 327 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 328 } 329 } else { 330 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 331 } 332 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 333 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 340 { 341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 342 PetscScalar *x,x1,x2,x3,s1,s2,s3; 343 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 344 const PetscScalar *b; 345 PetscErrorCode ierr; 346 PetscInt m = a->mbs,i,i2,nz,idx; 347 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 348 349 PetscFunctionBegin; 350 its = its*lits; 351 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 352 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 353 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 354 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 355 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 356 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 357 358 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 359 360 diag = a->diag; 361 idiag = a->idiag; 362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 363 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 364 365 if (flag & SOR_ZERO_INITIAL_GUESS) { 366 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 367 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 368 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 369 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 370 i2 = 3; 371 idiag += 9; 372 for (i=1; i<m; i++) { 373 v = aa + 9*ai[i]; 374 vi = aj + ai[i]; 375 nz = diag[i] - ai[i]; 376 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 377 while (nz--) { 378 idx = 3*(*vi++); 379 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 380 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 381 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 382 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 383 v += 9; 384 } 385 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 386 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 387 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 388 idiag += 9; 389 i2 += 3; 390 } 391 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 392 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 393 } 394 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 395 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 396 i2 = 0; 397 mdiag = a->idiag+9*a->mbs; 398 for (i=0; i<m; i++) { 399 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 400 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 401 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 402 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 403 mdiag += 9; 404 i2 += 3; 405 } 406 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 407 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 408 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 409 } 410 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 411 idiag = a->idiag+9*a->mbs - 9; 412 i2 = 3*m - 3; 413 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 414 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 415 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 416 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 417 idiag -= 9; 418 i2 -= 3; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 9*(diag[i]+1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 424 while (nz--) { 425 idx = 3*(*vi++); 426 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 427 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 428 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 429 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 430 v += 9; 431 } 432 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 433 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 434 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 435 idiag -= 9; 436 i2 -= 3; 437 } 438 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 439 } 440 } else { 441 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 442 } 443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 451 { 452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 453 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 454 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 455 const PetscScalar *b; 456 PetscErrorCode ierr; 457 PetscInt m = a->mbs,i,i2,nz,idx; 458 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 459 460 PetscFunctionBegin; 461 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 462 its = its*lits; 463 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 464 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 465 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 466 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 467 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 468 469 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 470 471 diag = a->diag; 472 idiag = a->idiag; 473 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 475 476 if (flag & SOR_ZERO_INITIAL_GUESS) { 477 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 478 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 479 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 480 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 481 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 482 i2 = 4; 483 idiag += 16; 484 for (i=1; i<m; i++) { 485 v = aa + 16*ai[i]; 486 vi = aj + ai[i]; 487 nz = diag[i] - ai[i]; 488 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 489 while (nz--) { 490 idx = 4*(*vi++); 491 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 492 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 493 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 494 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 495 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 496 v += 16; 497 } 498 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 499 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 500 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 501 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 502 idiag += 16; 503 i2 += 4; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+16*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 515 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 516 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 517 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 518 mdiag += 16; 519 i2 += 4; 520 } 521 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 522 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 523 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 524 } 525 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 526 idiag = a->idiag+16*a->mbs - 16; 527 i2 = 4*m - 4; 528 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 529 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 530 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 531 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 532 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 533 idiag -= 16; 534 i2 -= 4; 535 for (i=m-2; i>=0; i--) { 536 v = aa + 16*(diag[i]+1); 537 vi = aj + diag[i] + 1; 538 nz = ai[i+1] - diag[i] - 1; 539 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 540 while (nz--) { 541 idx = 4*(*vi++); 542 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 543 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 544 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 545 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 546 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 547 v += 16; 548 } 549 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 550 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 551 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 552 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 553 idiag -= 16; 554 i2 -= 4; 555 } 556 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 569 { 570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 571 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 572 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 573 const PetscScalar *b; 574 PetscErrorCode ierr; 575 PetscInt m = a->mbs,i,i2,nz,idx; 576 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 577 578 PetscFunctionBegin; 579 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 580 its = its*lits; 581 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 582 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 583 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 584 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 585 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 586 587 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 588 589 diag = a->diag; 590 idiag = a->idiag; 591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 592 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 593 594 if (flag & SOR_ZERO_INITIAL_GUESS) { 595 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 596 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 597 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 598 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 599 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 600 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 601 i2 = 5; 602 idiag += 25; 603 for (i=1; i<m; i++) { 604 v = aa + 25*ai[i]; 605 vi = aj + ai[i]; 606 nz = diag[i] - ai[i]; 607 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 608 while (nz--) { 609 idx = 5*(*vi++); 610 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 611 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 612 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 613 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 614 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 615 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 616 v += 25; 617 } 618 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 619 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 620 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 621 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 622 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 623 idiag += 25; 624 i2 += 5; 625 } 626 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 627 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 628 } 629 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 630 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 631 i2 = 0; 632 mdiag = a->idiag+25*a->mbs; 633 for (i=0; i<m; i++) { 634 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 635 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 636 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 637 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 638 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 639 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 640 mdiag += 25; 641 i2 += 5; 642 } 643 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 644 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 645 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 646 } 647 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 648 idiag = a->idiag+25*a->mbs - 25; 649 i2 = 5*m - 5; 650 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 651 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 652 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 653 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 654 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 655 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 656 idiag -= 25; 657 i2 -= 5; 658 for (i=m-2; i>=0; i--) { 659 v = aa + 25*(diag[i]+1); 660 vi = aj + diag[i] + 1; 661 nz = ai[i+1] - diag[i] - 1; 662 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 663 while (nz--) { 664 idx = 5*(*vi++); 665 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 666 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 667 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 668 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 669 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 670 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 671 v += 25; 672 } 673 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 674 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 675 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 676 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 677 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 678 idiag -= 25; 679 i2 -= 5; 680 } 681 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 682 } 683 } else { 684 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 685 } 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 694 { 695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 696 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 697 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 698 const PetscScalar *b; 699 PetscErrorCode ierr; 700 PetscInt m = a->mbs,i,i2,nz,idx; 701 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 702 703 PetscFunctionBegin; 704 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 705 its = its*lits; 706 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 707 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 708 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 709 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 710 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 711 712 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 713 714 diag = a->diag; 715 idiag = a->idiag; 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 718 719 if (flag & SOR_ZERO_INITIAL_GUESS) { 720 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 721 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 722 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 723 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 724 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 725 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 726 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 727 i2 = 6; 728 idiag += 36; 729 for (i=1; i<m; i++) { 730 v = aa + 36*ai[i]; 731 vi = aj + ai[i]; 732 nz = diag[i] - ai[i]; 733 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 734 while (nz--) { 735 idx = 6*(*vi++); 736 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 737 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 738 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 739 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 740 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 741 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 742 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 743 v += 36; 744 } 745 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 746 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 747 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 748 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 749 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 750 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 751 idiag += 36; 752 i2 += 6; 753 } 754 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 755 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 756 } 757 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 758 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 759 i2 = 0; 760 mdiag = a->idiag+36*a->mbs; 761 for (i=0; i<m; i++) { 762 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 763 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 764 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 765 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 766 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 767 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 768 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 769 mdiag += 36; 770 i2 += 6; 771 } 772 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 773 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 774 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 775 } 776 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 777 idiag = a->idiag+36*a->mbs - 36; 778 i2 = 6*m - 6; 779 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 780 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 781 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 782 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 783 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 784 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 785 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 786 idiag -= 36; 787 i2 -= 6; 788 for (i=m-2; i>=0; i--) { 789 v = aa + 36*(diag[i]+1); 790 vi = aj + diag[i] + 1; 791 nz = ai[i+1] - diag[i] - 1; 792 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 793 while (nz--) { 794 idx = 6*(*vi++); 795 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 796 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 797 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 798 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 799 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 800 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 801 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 802 v += 36; 803 } 804 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 805 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 806 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 807 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 808 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 809 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 810 idiag -= 36; 811 i2 -= 6; 812 } 813 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 814 } 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 817 } 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 826 { 827 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 828 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 829 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 830 const PetscScalar *b; 831 PetscErrorCode ierr; 832 PetscInt m = a->mbs,i,i2,nz,idx; 833 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 834 835 PetscFunctionBegin; 836 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 837 its = its*lits; 838 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 839 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 840 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 841 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 842 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 843 844 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 845 846 diag = a->diag; 847 idiag = a->idiag; 848 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 849 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 850 851 if (flag & SOR_ZERO_INITIAL_GUESS) { 852 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 853 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 854 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 855 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 856 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 857 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 858 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 859 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 860 i2 = 7; 861 idiag += 49; 862 for (i=1; i<m; i++) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = diag[i] - ai[i]; 866 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 867 while (nz--) { 868 idx = 7*(*vi++); 869 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 870 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 871 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 872 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 873 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 874 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 875 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 876 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 877 v += 49; 878 } 879 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 880 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 881 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 882 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 883 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 884 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 885 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 886 idiag += 49; 887 i2 += 7; 888 } 889 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 890 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 891 } 892 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 893 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 894 i2 = 0; 895 mdiag = a->idiag+49*a->mbs; 896 for (i=0; i<m; i++) { 897 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 898 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 899 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 900 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 901 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 902 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 903 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 904 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 905 mdiag += 36; 906 i2 += 6; 907 } 908 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 909 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 910 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 911 } 912 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 913 idiag = a->idiag+49*a->mbs - 49; 914 i2 = 7*m - 7; 915 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 916 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 917 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 918 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 919 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 920 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 921 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 922 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 923 idiag -= 49; 924 i2 -= 7; 925 for (i=m-2; i>=0; i--) { 926 v = aa + 49*(diag[i]+1); 927 vi = aj + diag[i] + 1; 928 nz = ai[i+1] - diag[i] - 1; 929 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 930 while (nz--) { 931 idx = 7*(*vi++); 932 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 933 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 934 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 935 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 936 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 937 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 938 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 939 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 940 v += 49; 941 } 942 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 943 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 944 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 945 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 946 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 947 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 948 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 949 idiag -= 49; 950 i2 -= 7; 951 } 952 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 953 } 954 } else { 955 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 956 } 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 /* 963 Special version for direct calls from Fortran (Used in PETSc-fun3d) 964 */ 965 #if defined(PETSC_HAVE_FORTRAN_CAPS) 966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 968 #define matsetvaluesblocked4_ matsetvaluesblocked4 969 #endif 970 971 EXTERN_C_BEGIN 972 #undef __FUNCT__ 973 #define __FUNCT__ "matsetvaluesblocked4_" 974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 975 { 976 Mat A = *AA; 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 979 PetscInt *ai=a->i,*ailen=a->ilen; 980 PetscInt *aj=a->j,stepval,lastcol = -1; 981 const PetscScalar *value = v; 982 MatScalar *ap,*aa = a->a,*bap; 983 984 PetscFunctionBegin; 985 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 986 stepval = (n-1)*4; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; 989 rp = aj + ai[row]; 990 ap = aa + 16*ai[row]; 991 nrow = ailen[row]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; 996 if (col <= lastcol) low = 0; else high = nrow; 997 lastcol = col; 998 value = v + k*(stepval+4 + l)*4; 999 while (high-low > 7) { 1000 t = (low+high)/2; 1001 if (rp[t] > col) high = t; 1002 else low = t; 1003 } 1004 for (i=low; i<high; i++) { 1005 if (rp[i] > col) break; 1006 if (rp[i] == col) { 1007 bap = ap + 16*i; 1008 for (ii=0; ii<4; ii++,value+=stepval) { 1009 for (jj=ii; jj<16; jj+=4) { 1010 bap[jj] += *value++; 1011 } 1012 } 1013 goto noinsert2; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column index thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 for (ii=N; ii>=i; ii--) { 1020 rp[ii+1] = rp[ii]; 1021 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1022 } 1023 if (N >= i) { 1024 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1025 } 1026 rp[i] = col; 1027 bap = ap + 16*i; 1028 for (ii=0; ii<4; ii++,value+=stepval) { 1029 for (jj=ii; jj<16; jj+=4) { 1030 bap[jj] = *value++; 1031 } 1032 } 1033 noinsert2:; 1034 low = i; 1035 } 1036 ailen[row] = nrow; 1037 } 1038 PetscFunctionReturnVoid(); 1039 } 1040 EXTERN_C_END 1041 1042 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1043 #define matsetvalues4_ MATSETVALUES4 1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1045 #define matsetvalues4_ matsetvalues4 1046 #endif 1047 1048 EXTERN_C_BEGIN 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatSetValues4_" 1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1052 { 1053 Mat A = *AA; 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1056 PetscInt *ai=a->i,*ailen=a->ilen; 1057 PetscInt *aj=a->j,brow,bcol; 1058 PetscInt ridx,cidx,lastcol = -1; 1059 MatScalar *ap,value,*aa=a->a,*bap; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<m; k++) { /* loop over added rows */ 1063 row = im[k]; brow = row/4; 1064 rp = aj + ai[brow]; 1065 ap = aa + 16*ai[brow]; 1066 nrow = ailen[brow]; 1067 low = 0; 1068 high = nrow; 1069 for (l=0; l<n; l++) { /* loop over added columns */ 1070 col = in[l]; bcol = col/4; 1071 ridx = row % 4; cidx = col % 4; 1072 value = v[l + k*n]; 1073 if (col <= lastcol) low = 0; else high = nrow; 1074 lastcol = col; 1075 while (high-low > 7) { 1076 t = (low+high)/2; 1077 if (rp[t] > bcol) high = t; 1078 else low = t; 1079 } 1080 for (i=low; i<high; i++) { 1081 if (rp[i] > bcol) break; 1082 if (rp[i] == bcol) { 1083 bap = ap + 16*i + 4*cidx + ridx; 1084 *bap += value; 1085 goto noinsert1; 1086 } 1087 } 1088 N = nrow++ - 1; 1089 high++; /* added new column thus must search to one higher than before */ 1090 /* shift up all the later entries in this row */ 1091 for (ii=N; ii>=i; ii--) { 1092 rp[ii+1] = rp[ii]; 1093 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1094 } 1095 if (N>=i) { 1096 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1097 } 1098 rp[i] = bcol; 1099 ap[16*i + 4*cidx + ridx] = value; 1100 noinsert1:; 1101 low = i; 1102 } 1103 ailen[brow] = nrow; 1104 } 1105 PetscFunctionReturnVoid(); 1106 } 1107 EXTERN_C_END 1108 1109 /* 1110 Checks for missing diagonals 1111 */ 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt *diag,*jj = a->j,i; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1122 *missing = PETSC_FALSE; 1123 if (A->rmap->n > 0 && !jj) { 1124 *missing = PETSC_TRUE; 1125 if (d) *d = 0; 1126 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1127 } else { 1128 diag = a->diag; 1129 for (i=0; i<a->mbs; i++) { 1130 if (jj[diag[i]] != i) { 1131 *missing = PETSC_TRUE; 1132 if (d) *d = i; 1133 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1134 } 1135 } 1136 } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1143 { 1144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscInt i,j,m = a->mbs; 1147 1148 PetscFunctionBegin; 1149 if (!a->diag) { 1150 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1151 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1152 a->free_diag = PETSC_TRUE; 1153 } 1154 for (i=0; i<m; i++) { 1155 a->diag[i] = a->i[i+1]; 1156 for (j=a->i[i]; j<a->i[i+1]; j++) { 1157 if (a->j[j] == i) { 1158 a->diag[i] = j; 1159 break; 1160 } 1161 } 1162 } 1163 PetscFunctionReturn(0); 1164 } 1165 1166 1167 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1171 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1172 { 1173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1174 PetscErrorCode ierr; 1175 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1176 PetscInt *tia, *tja; 1177 1178 PetscFunctionBegin; 1179 *nn = n; 1180 if (!ia) PetscFunctionReturn(0); 1181 if (symmetric) { 1182 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1183 } else { 1184 tia = a->i; tja = a->j; 1185 } 1186 1187 if (!blockcompressed && bs > 1) { 1188 (*nn) *= bs; 1189 nbs = bs; 1190 /* malloc & create the natural set of indices */ 1191 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1192 if (n) { 1193 (*ia)[0] = 0; 1194 for (j=1; j<bs; j++) { 1195 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1196 } 1197 } 1198 1199 for (i=1; i<n; i++) { 1200 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1201 for (j=1; j<bs; j++) { 1202 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1203 } 1204 } 1205 if (n) { 1206 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1207 } 1208 1209 if (ja) { 1210 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1211 cnt = 0; 1212 for (i=0; i<n; i++) { 1213 for (j=0; j<bs; j++) { 1214 for (k=tia[i]; k<tia[i+1]; k++) { 1215 for (l=0; l<bs; l++) { 1216 (*ja)[cnt++] = bs*tja[k] + l; 1217 } 1218 } 1219 } 1220 } 1221 } 1222 1223 n *= bs; 1224 nz *= bs*bs; 1225 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1226 ierr = PetscFree(tia);CHKERRQ(ierr); 1227 ierr = PetscFree(tja);CHKERRQ(ierr); 1228 } 1229 } else if (oshift == 1) { 1230 if (symmetric) { 1231 PetscInt nz = tia[A->rmap->n/bs]; 1232 /* add 1 to i and j indices */ 1233 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1234 *ia = tia; 1235 if (ja) { 1236 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1237 *ja = tja; 1238 } 1239 } else { 1240 PetscInt nz = a->i[A->rmap->n/bs]; 1241 /* malloc space and add 1 to i and j indices */ 1242 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1243 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1244 if (ja) { 1245 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1246 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1247 } 1248 } 1249 } else { 1250 *ia = tia; 1251 if (ja) *ja = tja; 1252 } 1253 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1259 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1260 { 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 if (!ia) PetscFunctionReturn(0); 1265 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1266 ierr = PetscFree(*ia);CHKERRQ(ierr); 1267 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1268 } 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1274 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1275 { 1276 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1277 PetscErrorCode ierr; 1278 1279 PetscFunctionBegin; 1280 #if defined(PETSC_USE_LOG) 1281 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1282 #endif 1283 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1284 if (a->row) { 1285 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1286 } 1287 if (a->col) { 1288 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1289 } 1290 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1291 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1292 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1293 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1294 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1295 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1296 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1297 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1298 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1299 1300 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1301 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1302 ierr = PetscFree(a);CHKERRQ(ierr); 1303 1304 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1318 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1319 { 1320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 switch (op) { 1325 case MAT_ROW_ORIENTED: 1326 a->roworiented = flg; 1327 break; 1328 case MAT_KEEP_NONZERO_PATTERN: 1329 a->keepnonzeropattern = flg; 1330 break; 1331 case MAT_NEW_NONZERO_LOCATIONS: 1332 a->nonew = (flg ? 0 : 1); 1333 break; 1334 case MAT_NEW_NONZERO_LOCATION_ERR: 1335 a->nonew = (flg ? -1 : 0); 1336 break; 1337 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1338 a->nonew = (flg ? -2 : 0); 1339 break; 1340 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1341 a->nounused = (flg ? -1 : 0); 1342 break; 1343 case MAT_NEW_DIAGONALS: 1344 case MAT_IGNORE_OFF_PROC_ENTRIES: 1345 case MAT_USE_HASH_TABLE: 1346 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1347 break; 1348 case MAT_SYMMETRIC: 1349 case MAT_STRUCTURALLY_SYMMETRIC: 1350 case MAT_HERMITIAN: 1351 case MAT_SYMMETRY_ETERNAL: 1352 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1353 break; 1354 default: 1355 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1362 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1363 { 1364 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1365 PetscErrorCode ierr; 1366 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1367 MatScalar *aa,*aa_i; 1368 PetscScalar *v_i; 1369 1370 PetscFunctionBegin; 1371 bs = A->rmap->bs; 1372 ai = a->i; 1373 aj = a->j; 1374 aa = a->a; 1375 bs2 = a->bs2; 1376 1377 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1378 1379 bn = row/bs; /* Block number */ 1380 bp = row % bs; /* Block Position */ 1381 M = ai[bn+1] - ai[bn]; 1382 *nz = bs*M; 1383 1384 if (v) { 1385 *v = 0; 1386 if (*nz) { 1387 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1388 for (i=0; i<M; i++) { /* for each block in the block row */ 1389 v_i = *v + i*bs; 1390 aa_i = aa + bs2*(ai[bn] + i); 1391 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1392 } 1393 } 1394 } 1395 1396 if (idx) { 1397 *idx = 0; 1398 if (*nz) { 1399 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1400 for (i=0; i<M; i++) { /* for each block in the block row */ 1401 idx_i = *idx + i*bs; 1402 itmp = bs*aj[ai[bn] + i]; 1403 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1404 } 1405 } 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1412 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1413 { 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1418 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1424 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1425 { 1426 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1427 Mat C; 1428 PetscErrorCode ierr; 1429 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1430 PetscInt *rows,*cols,bs2=a->bs2; 1431 MatScalar *array; 1432 1433 PetscFunctionBegin; 1434 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1435 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1436 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1437 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1438 1439 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1440 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1441 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1442 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1443 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1444 ierr = PetscFree(col);CHKERRQ(ierr); 1445 } else { 1446 C = *B; 1447 } 1448 1449 array = a->a; 1450 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1451 cols = rows + bs; 1452 for (i=0; i<mbs; i++) { 1453 cols[0] = i*bs; 1454 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1455 len = ai[i+1] - ai[i]; 1456 for (j=0; j<len; j++) { 1457 rows[0] = (*aj++)*bs; 1458 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1459 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1460 array += bs2; 1461 } 1462 } 1463 ierr = PetscFree(rows);CHKERRQ(ierr); 1464 1465 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1466 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1467 1468 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1469 *B = C; 1470 } else { 1471 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1478 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1479 { 1480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1483 int fd; 1484 PetscScalar *aa; 1485 FILE *file; 1486 1487 PetscFunctionBegin; 1488 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1489 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1490 col_lens[0] = MAT_FILE_COOKIE; 1491 1492 col_lens[1] = A->rmap->N; 1493 col_lens[2] = A->cmap->n; 1494 col_lens[3] = a->nz*bs2; 1495 1496 /* store lengths of each row and write (including header) to file */ 1497 count = 0; 1498 for (i=0; i<a->mbs; i++) { 1499 for (j=0; j<bs; j++) { 1500 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1501 } 1502 } 1503 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1504 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1505 1506 /* store column indices (zero start index) */ 1507 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1508 count = 0; 1509 for (i=0; i<a->mbs; i++) { 1510 for (j=0; j<bs; j++) { 1511 for (k=a->i[i]; k<a->i[i+1]; k++) { 1512 for (l=0; l<bs; l++) { 1513 jj[count++] = bs*a->j[k] + l; 1514 } 1515 } 1516 } 1517 } 1518 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1519 ierr = PetscFree(jj);CHKERRQ(ierr); 1520 1521 /* store nonzero values */ 1522 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1523 count = 0; 1524 for (i=0; i<a->mbs; i++) { 1525 for (j=0; j<bs; j++) { 1526 for (k=a->i[i]; k<a->i[i+1]; k++) { 1527 for (l=0; l<bs; l++) { 1528 aa[count++] = a->a[bs2*k + l*bs + j]; 1529 } 1530 } 1531 } 1532 } 1533 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1534 ierr = PetscFree(aa);CHKERRQ(ierr); 1535 1536 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1537 if (file) { 1538 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1545 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1546 { 1547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1548 PetscErrorCode ierr; 1549 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1550 PetscViewerFormat format; 1551 1552 PetscFunctionBegin; 1553 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1554 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1555 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1556 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1557 Mat aij; 1558 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1559 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1560 ierr = MatDestroy(aij);CHKERRQ(ierr); 1561 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1562 PetscFunctionReturn(0); 1563 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1564 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1565 for (i=0; i<a->mbs; i++) { 1566 for (j=0; j<bs; j++) { 1567 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1568 for (k=a->i[i]; k<a->i[i+1]; k++) { 1569 for (l=0; l<bs; l++) { 1570 #if defined(PETSC_USE_COMPLEX) 1571 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1573 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1576 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1577 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1578 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1579 } 1580 #else 1581 if (a->a[bs2*k + l*bs + j] != 0.0) { 1582 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1583 } 1584 #endif 1585 } 1586 } 1587 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1588 } 1589 } 1590 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1591 } else { 1592 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1593 for (i=0; i<a->mbs; i++) { 1594 for (j=0; j<bs; j++) { 1595 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1596 for (k=a->i[i]; k<a->i[i+1]; k++) { 1597 for (l=0; l<bs; l++) { 1598 #if defined(PETSC_USE_COMPLEX) 1599 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1600 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1601 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1603 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1604 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1605 } else { 1606 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1607 } 1608 #else 1609 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1610 #endif 1611 } 1612 } 1613 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1614 } 1615 } 1616 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1617 } 1618 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1624 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1625 { 1626 Mat A = (Mat) Aa; 1627 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1628 PetscErrorCode ierr; 1629 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1630 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1631 MatScalar *aa; 1632 PetscViewer viewer; 1633 1634 PetscFunctionBegin; 1635 1636 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1637 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1638 1639 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1640 1641 /* loop over matrix elements drawing boxes */ 1642 color = PETSC_DRAW_BLUE; 1643 for (i=0,row=0; i<mbs; i++,row+=bs) { 1644 for (j=a->i[i]; j<a->i[i+1]; j++) { 1645 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1646 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1647 aa = a->a + j*bs2; 1648 for (k=0; k<bs; k++) { 1649 for (l=0; l<bs; l++) { 1650 if (PetscRealPart(*aa++) >= 0.) continue; 1651 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1652 } 1653 } 1654 } 1655 } 1656 color = PETSC_DRAW_CYAN; 1657 for (i=0,row=0; i<mbs; i++,row+=bs) { 1658 for (j=a->i[i]; j<a->i[i+1]; j++) { 1659 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1660 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1661 aa = a->a + j*bs2; 1662 for (k=0; k<bs; k++) { 1663 for (l=0; l<bs; l++) { 1664 if (PetscRealPart(*aa++) != 0.) continue; 1665 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 } 1670 1671 color = PETSC_DRAW_RED; 1672 for (i=0,row=0; i<mbs; i++,row+=bs) { 1673 for (j=a->i[i]; j<a->i[i+1]; j++) { 1674 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1675 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1676 aa = a->a + j*bs2; 1677 for (k=0; k<bs; k++) { 1678 for (l=0; l<bs; l++) { 1679 if (PetscRealPart(*aa++) <= 0.) continue; 1680 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1681 } 1682 } 1683 } 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1690 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1691 { 1692 PetscErrorCode ierr; 1693 PetscReal xl,yl,xr,yr,w,h; 1694 PetscDraw draw; 1695 PetscTruth isnull; 1696 1697 PetscFunctionBegin; 1698 1699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1701 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1703 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1704 xr += w; yr += h; xl = -w; yl = -h; 1705 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1706 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1707 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatView_SeqBAIJ" 1713 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1714 { 1715 PetscErrorCode ierr; 1716 PetscTruth iascii,isbinary,isdraw; 1717 1718 PetscFunctionBegin; 1719 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1722 if (iascii){ 1723 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1724 } else if (isbinary) { 1725 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1726 } else if (isdraw) { 1727 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1728 } else { 1729 Mat B; 1730 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1731 ierr = MatView(B,viewer);CHKERRQ(ierr); 1732 ierr = MatDestroy(B);CHKERRQ(ierr); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1740 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1741 { 1742 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1743 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1744 PetscInt *ai = a->i,*ailen = a->ilen; 1745 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1746 MatScalar *ap,*aa = a->a; 1747 1748 PetscFunctionBegin; 1749 for (k=0; k<m; k++) { /* loop over rows */ 1750 row = im[k]; brow = row/bs; 1751 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1752 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1753 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1754 nrow = ailen[brow]; 1755 for (l=0; l<n; l++) { /* loop over columns */ 1756 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1757 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1758 col = in[l] ; 1759 bcol = col/bs; 1760 cidx = col%bs; 1761 ridx = row%bs; 1762 high = nrow; 1763 low = 0; /* assume unsorted */ 1764 while (high-low > 5) { 1765 t = (low+high)/2; 1766 if (rp[t] > bcol) high = t; 1767 else low = t; 1768 } 1769 for (i=low; i<high; i++) { 1770 if (rp[i] > bcol) break; 1771 if (rp[i] == bcol) { 1772 *v++ = ap[bs2*i+bs*cidx+ridx]; 1773 goto finished; 1774 } 1775 } 1776 *v++ = 0.0; 1777 finished:; 1778 } 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #define CHUNKSIZE 10 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1786 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1787 { 1788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1789 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1790 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1791 PetscErrorCode ierr; 1792 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1793 PetscTruth roworiented=a->roworiented; 1794 const PetscScalar *value = v; 1795 MatScalar *ap,*aa = a->a,*bap; 1796 1797 PetscFunctionBegin; 1798 if (roworiented) { 1799 stepval = (n-1)*bs; 1800 } else { 1801 stepval = (m-1)*bs; 1802 } 1803 for (k=0; k<m; k++) { /* loop over added rows */ 1804 row = im[k]; 1805 if (row < 0) continue; 1806 #if defined(PETSC_USE_DEBUG) 1807 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1808 #endif 1809 rp = aj + ai[row]; 1810 ap = aa + bs2*ai[row]; 1811 rmax = imax[row]; 1812 nrow = ailen[row]; 1813 low = 0; 1814 high = nrow; 1815 for (l=0; l<n; l++) { /* loop over added columns */ 1816 if (in[l] < 0) continue; 1817 #if defined(PETSC_USE_DEBUG) 1818 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1819 #endif 1820 col = in[l]; 1821 if (roworiented) { 1822 value = v + k*(stepval+bs)*bs + l*bs; 1823 } else { 1824 value = v + l*(stepval+bs)*bs + k*bs; 1825 } 1826 if (col <= lastcol) low = 0; else high = nrow; 1827 lastcol = col; 1828 while (high-low > 7) { 1829 t = (low+high)/2; 1830 if (rp[t] > col) high = t; 1831 else low = t; 1832 } 1833 for (i=low; i<high; i++) { 1834 if (rp[i] > col) break; 1835 if (rp[i] == col) { 1836 bap = ap + bs2*i; 1837 if (roworiented) { 1838 if (is == ADD_VALUES) { 1839 for (ii=0; ii<bs; ii++,value+=stepval) { 1840 for (jj=ii; jj<bs2; jj+=bs) { 1841 bap[jj] += *value++; 1842 } 1843 } 1844 } else { 1845 for (ii=0; ii<bs; ii++,value+=stepval) { 1846 for (jj=ii; jj<bs2; jj+=bs) { 1847 bap[jj] = *value++; 1848 } 1849 } 1850 } 1851 } else { 1852 if (is == ADD_VALUES) { 1853 for (ii=0; ii<bs; ii++,value+=stepval) { 1854 for (jj=0; jj<bs; jj++) { 1855 *bap++ += *value++; 1856 } 1857 } 1858 } else { 1859 for (ii=0; ii<bs; ii++,value+=stepval) { 1860 for (jj=0; jj<bs; jj++) { 1861 *bap++ = *value++; 1862 } 1863 } 1864 } 1865 } 1866 goto noinsert2; 1867 } 1868 } 1869 if (nonew == 1) goto noinsert2; 1870 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1871 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1872 N = nrow++ - 1; high++; 1873 /* shift up all the later entries in this row */ 1874 for (ii=N; ii>=i; ii--) { 1875 rp[ii+1] = rp[ii]; 1876 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1877 } 1878 if (N >= i) { 1879 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1880 } 1881 rp[i] = col; 1882 bap = ap + bs2*i; 1883 if (roworiented) { 1884 for (ii=0; ii<bs; ii++,value+=stepval) { 1885 for (jj=ii; jj<bs2; jj+=bs) { 1886 bap[jj] = *value++; 1887 } 1888 } 1889 } else { 1890 for (ii=0; ii<bs; ii++,value+=stepval) { 1891 for (jj=0; jj<bs; jj++) { 1892 *bap++ = *value++; 1893 } 1894 } 1895 } 1896 noinsert2:; 1897 low = i; 1898 } 1899 ailen[row] = nrow; 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1907 { 1908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1909 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1910 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1911 PetscErrorCode ierr; 1912 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1913 MatScalar *aa = a->a,*ap; 1914 PetscReal ratio=0.6; 1915 1916 PetscFunctionBegin; 1917 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1918 1919 if (m) rmax = ailen[0]; 1920 for (i=1; i<mbs; i++) { 1921 /* move each row back by the amount of empty slots (fshift) before it*/ 1922 fshift += imax[i-1] - ailen[i-1]; 1923 rmax = PetscMax(rmax,ailen[i]); 1924 if (fshift) { 1925 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1926 N = ailen[i]; 1927 for (j=0; j<N; j++) { 1928 ip[j-fshift] = ip[j]; 1929 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1930 } 1931 } 1932 ai[i] = ai[i-1] + ailen[i-1]; 1933 } 1934 if (mbs) { 1935 fshift += imax[mbs-1] - ailen[mbs-1]; 1936 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1937 } 1938 /* reset ilen and imax for each row */ 1939 for (i=0; i<mbs; i++) { 1940 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1941 } 1942 a->nz = ai[mbs]; 1943 1944 /* diagonals may have moved, so kill the diagonal pointers */ 1945 a->idiagvalid = PETSC_FALSE; 1946 if (fshift && a->diag) { 1947 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1948 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1949 a->diag = 0; 1950 } 1951 if (fshift && a->nounused == -1) { 1952 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1953 } 1954 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1955 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1956 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1957 a->reallocs = 0; 1958 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1959 1960 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1961 if (a->compressedrow.use){ 1962 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1963 } 1964 1965 A->same_nonzero = PETSC_TRUE; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 /* 1970 This function returns an array of flags which indicate the locations of contiguous 1971 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1972 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1973 Assume: sizes should be long enough to hold all the values. 1974 */ 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1977 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1978 { 1979 PetscInt i,j,k,row; 1980 PetscTruth flg; 1981 1982 PetscFunctionBegin; 1983 for (i=0,j=0; i<n; j++) { 1984 row = idx[i]; 1985 if (row%bs!=0) { /* Not the begining of a block */ 1986 sizes[j] = 1; 1987 i++; 1988 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1989 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1990 i++; 1991 } else { /* Begining of the block, so check if the complete block exists */ 1992 flg = PETSC_TRUE; 1993 for (k=1; k<bs; k++) { 1994 if (row+k != idx[i+k]) { /* break in the block */ 1995 flg = PETSC_FALSE; 1996 break; 1997 } 1998 } 1999 if (flg) { /* No break in the bs */ 2000 sizes[j] = bs; 2001 i+= bs; 2002 } else { 2003 sizes[j] = 1; 2004 i++; 2005 } 2006 } 2007 } 2008 *bs_max = j; 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2014 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2015 { 2016 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2017 PetscErrorCode ierr; 2018 PetscInt i,j,k,count,*rows; 2019 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2020 PetscScalar zero = 0.0; 2021 MatScalar *aa; 2022 2023 PetscFunctionBegin; 2024 /* Make a copy of the IS and sort it */ 2025 /* allocate memory for rows,sizes */ 2026 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2027 sizes = rows + is_n; 2028 2029 /* copy IS values to rows, and sort them */ 2030 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2031 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2032 if (baij->keepnonzeropattern) { 2033 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2034 bs_max = is_n; 2035 A->same_nonzero = PETSC_TRUE; 2036 } else { 2037 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2038 A->same_nonzero = PETSC_FALSE; 2039 } 2040 2041 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2042 row = rows[j]; 2043 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2044 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2045 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2046 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2047 if (diag != 0.0) { 2048 if (baij->ilen[row/bs] > 0) { 2049 baij->ilen[row/bs] = 1; 2050 baij->j[baij->i[row/bs]] = row/bs; 2051 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2052 } 2053 /* Now insert all the diagonal values for this bs */ 2054 for (k=0; k<bs; k++) { 2055 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2056 } 2057 } else { /* (diag == 0.0) */ 2058 baij->ilen[row/bs] = 0; 2059 } /* end (diag == 0.0) */ 2060 } else { /* (sizes[i] != bs) */ 2061 #if defined (PETSC_USE_DEBUG) 2062 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2063 #endif 2064 for (k=0; k<count; k++) { 2065 aa[0] = zero; 2066 aa += bs; 2067 } 2068 if (diag != 0.0) { 2069 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 } 2072 } 2073 2074 ierr = PetscFree(rows);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2081 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2082 { 2083 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2084 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2085 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2086 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2087 PetscErrorCode ierr; 2088 PetscInt ridx,cidx,bs2=a->bs2; 2089 PetscTruth roworiented=a->roworiented; 2090 MatScalar *ap,value,*aa=a->a,*bap; 2091 2092 PetscFunctionBegin; 2093 for (k=0; k<m; k++) { /* loop over added rows */ 2094 row = im[k]; 2095 brow = row/bs; 2096 if (row < 0) continue; 2097 #if defined(PETSC_USE_DEBUG) 2098 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2099 #endif 2100 rp = aj + ai[brow]; 2101 ap = aa + bs2*ai[brow]; 2102 rmax = imax[brow]; 2103 nrow = ailen[brow]; 2104 low = 0; 2105 high = nrow; 2106 for (l=0; l<n; l++) { /* loop over added columns */ 2107 if (in[l] < 0) continue; 2108 #if defined(PETSC_USE_DEBUG) 2109 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2110 #endif 2111 col = in[l]; bcol = col/bs; 2112 ridx = row % bs; cidx = col % bs; 2113 if (roworiented) { 2114 value = v[l + k*n]; 2115 } else { 2116 value = v[k + l*m]; 2117 } 2118 if (col <= lastcol) low = 0; else high = nrow; 2119 lastcol = col; 2120 while (high-low > 7) { 2121 t = (low+high)/2; 2122 if (rp[t] > bcol) high = t; 2123 else low = t; 2124 } 2125 for (i=low; i<high; i++) { 2126 if (rp[i] > bcol) break; 2127 if (rp[i] == bcol) { 2128 bap = ap + bs2*i + bs*cidx + ridx; 2129 if (is == ADD_VALUES) *bap += value; 2130 else *bap = value; 2131 goto noinsert1; 2132 } 2133 } 2134 if (nonew == 1) goto noinsert1; 2135 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2137 N = nrow++ - 1; high++; 2138 /* shift up all the later entries in this row */ 2139 for (ii=N; ii>=i; ii--) { 2140 rp[ii+1] = rp[ii]; 2141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2142 } 2143 if (N>=i) { 2144 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2145 } 2146 rp[i] = bcol; 2147 ap[bs2*i + bs*cidx + ridx] = value; 2148 a->nz++; 2149 noinsert1:; 2150 low = i; 2151 } 2152 ailen[brow] = nrow; 2153 } 2154 A->same_nonzero = PETSC_FALSE; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2162 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2163 { 2164 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2165 Mat outA; 2166 PetscErrorCode ierr; 2167 PetscTruth row_identity,col_identity; 2168 2169 PetscFunctionBegin; 2170 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2171 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2172 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2173 if (!row_identity || !col_identity) { 2174 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2175 } 2176 2177 outA = inA; 2178 inA->factor = MAT_FACTOR_LU; 2179 2180 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2181 2182 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2183 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2184 a->row = row; 2185 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2186 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2187 a->col = col; 2188 2189 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2190 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2191 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2192 2193 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2194 if (!a->solve_work) { 2195 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2196 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2197 } 2198 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2199 2200 PetscFunctionReturn(0); 2201 } 2202 2203 EXTERN_C_BEGIN 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2206 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2207 { 2208 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2209 PetscInt i,nz,mbs; 2210 2211 PetscFunctionBegin; 2212 nz = baij->maxnz/baij->bs2; 2213 mbs = baij->mbs; 2214 for (i=0; i<nz; i++) { 2215 baij->j[i] = indices[i]; 2216 } 2217 baij->nz = nz; 2218 for (i=0; i<mbs; i++) { 2219 baij->ilen[i] = baij->imax[i]; 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 EXTERN_C_END 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2227 /*@ 2228 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2229 in the matrix. 2230 2231 Input Parameters: 2232 + mat - the SeqBAIJ matrix 2233 - indices - the column indices 2234 2235 Level: advanced 2236 2237 Notes: 2238 This can be called if you have precomputed the nonzero structure of the 2239 matrix and want to provide it to the matrix object to improve the performance 2240 of the MatSetValues() operation. 2241 2242 You MUST have set the correct numbers of nonzeros per row in the call to 2243 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2244 2245 MUST be called before any calls to MatSetValues(); 2246 2247 @*/ 2248 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2249 { 2250 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2251 2252 PetscFunctionBegin; 2253 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2254 PetscValidPointer(indices,2); 2255 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2256 if (f) { 2257 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2258 } else { 2259 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2266 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2267 { 2268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2269 PetscErrorCode ierr; 2270 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2271 PetscReal atmp; 2272 PetscScalar *x,zero = 0.0; 2273 MatScalar *aa; 2274 PetscInt ncols,brow,krow,kcol; 2275 2276 PetscFunctionBegin; 2277 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2278 bs = A->rmap->bs; 2279 aa = a->a; 2280 ai = a->i; 2281 aj = a->j; 2282 mbs = a->mbs; 2283 2284 ierr = VecSet(v,zero);CHKERRQ(ierr); 2285 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2286 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2287 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2288 for (i=0; i<mbs; i++) { 2289 ncols = ai[1] - ai[0]; ai++; 2290 brow = bs*i; 2291 for (j=0; j<ncols; j++){ 2292 for (kcol=0; kcol<bs; kcol++){ 2293 for (krow=0; krow<bs; krow++){ 2294 atmp = PetscAbsScalar(*aa);aa++; 2295 row = brow + krow; /* row index */ 2296 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2297 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2298 } 2299 } 2300 aj++; 2301 } 2302 } 2303 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatCopy_SeqBAIJ" 2309 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2310 { 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 /* If the two matrices have the same copy implementation, use fast copy. */ 2315 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2317 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2318 2319 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2320 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2321 } 2322 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2323 } else { 2324 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2325 } 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2331 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2332 { 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2342 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2343 { 2344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2345 PetscFunctionBegin; 2346 *array = a->a; 2347 PetscFunctionReturn(0); 2348 } 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2352 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2353 { 2354 PetscFunctionBegin; 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #include "petscblaslapack.h" 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2361 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2362 { 2363 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2364 PetscErrorCode ierr; 2365 PetscInt i,bs=Y->rmap->bs,j,bs2; 2366 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2367 2368 PetscFunctionBegin; 2369 if (str == SAME_NONZERO_PATTERN) { 2370 PetscScalar alpha = a; 2371 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2372 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2373 if (y->xtoy && y->XtoY != X) { 2374 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2375 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2376 } 2377 if (!y->xtoy) { /* get xtoy */ 2378 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2379 y->XtoY = X; 2380 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2381 } 2382 bs2 = bs*bs; 2383 for (i=0; i<x->nz; i++) { 2384 j = 0; 2385 while (j < bs2){ 2386 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2387 j++; 2388 } 2389 } 2390 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2391 } else { 2392 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2393 } 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2399 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2400 { 2401 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2402 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2403 MatScalar *aa = a->a; 2404 2405 PetscFunctionBegin; 2406 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2412 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2413 { 2414 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2415 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2416 MatScalar *aa = a->a; 2417 2418 PetscFunctionBegin; 2419 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2427 /* 2428 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2429 */ 2430 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2431 { 2432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2433 PetscErrorCode ierr; 2434 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2435 PetscInt nz = a->i[m],row,*jj,mr,col; 2436 2437 PetscFunctionBegin; 2438 *nn = n; 2439 if (!ia) PetscFunctionReturn(0); 2440 if (symmetric) { 2441 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2442 } else { 2443 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2444 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2445 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2446 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2447 jj = a->j; 2448 for (i=0; i<nz; i++) { 2449 collengths[jj[i]]++; 2450 } 2451 cia[0] = oshift; 2452 for (i=0; i<n; i++) { 2453 cia[i+1] = cia[i] + collengths[i]; 2454 } 2455 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2456 jj = a->j; 2457 for (row=0; row<m; row++) { 2458 mr = a->i[row+1] - a->i[row]; 2459 for (i=0; i<mr; i++) { 2460 col = *jj++; 2461 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2462 } 2463 } 2464 ierr = PetscFree(collengths);CHKERRQ(ierr); 2465 *ia = cia; *ja = cja; 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2472 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2473 { 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 if (!ia) PetscFunctionReturn(0); 2478 ierr = PetscFree(*ia);CHKERRQ(ierr); 2479 ierr = PetscFree(*ja);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2485 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2486 { 2487 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2488 PetscErrorCode ierr; 2489 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2490 PetscScalar dx,*y,*xx,*w3_array; 2491 PetscScalar *vscale_array; 2492 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2493 Vec w1=coloring->w1,w2=coloring->w2,w3; 2494 void *fctx = coloring->fctx; 2495 PetscTruth flg = PETSC_FALSE; 2496 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2497 Vec x1_tmp; 2498 2499 PetscFunctionBegin; 2500 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2501 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2502 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2503 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2504 2505 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2506 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2507 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2508 if (flg) { 2509 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2510 } else { 2511 PetscTruth assembled; 2512 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2513 if (assembled) { 2514 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2515 } 2516 } 2517 2518 x1_tmp = x1; 2519 if (!coloring->vscale){ 2520 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2521 } 2522 2523 /* 2524 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2525 coloring->F for the coarser grids from the finest 2526 */ 2527 if (coloring->F) { 2528 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2529 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2530 if (m1 != m2) { 2531 coloring->F = 0; 2532 } 2533 } 2534 2535 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2536 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2537 } 2538 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2539 2540 /* Set w1 = F(x1) */ 2541 if (coloring->F) { 2542 w1 = coloring->F; /* use already computed value of function */ 2543 coloring->F = 0; 2544 } else { 2545 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2546 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2547 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2548 } 2549 2550 if (!coloring->w3) { 2551 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2552 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2553 } 2554 w3 = coloring->w3; 2555 2556 CHKMEMQ; 2557 /* Compute all the local scale factors, including ghost points */ 2558 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2559 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2560 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2561 if (ctype == IS_COLORING_GHOSTED){ 2562 col_start = 0; col_end = N; 2563 } else if (ctype == IS_COLORING_GLOBAL){ 2564 xx = xx - start; 2565 vscale_array = vscale_array - start; 2566 col_start = start; col_end = N + start; 2567 } CHKMEMQ; 2568 for (col=col_start; col<col_end; col++){ 2569 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2570 if (coloring->htype[0] == 'w') { 2571 dx = 1.0 + unorm; 2572 } else { 2573 dx = xx[col]; 2574 } 2575 if (dx == 0.0) dx = 1.0; 2576 #if !defined(PETSC_USE_COMPLEX) 2577 if (dx < umin && dx >= 0.0) dx = umin; 2578 else if (dx < 0.0 && dx > -umin) dx = -umin; 2579 #else 2580 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2581 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2582 #endif 2583 dx *= epsilon; 2584 vscale_array[col] = 1.0/dx; 2585 } CHKMEMQ; 2586 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2587 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2588 if (ctype == IS_COLORING_GLOBAL){ 2589 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2590 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2591 } 2592 CHKMEMQ; 2593 if (coloring->vscaleforrow) { 2594 vscaleforrow = coloring->vscaleforrow; 2595 } else { 2596 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2597 } 2598 2599 2600 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2601 /* 2602 Loop over each color 2603 */ 2604 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2605 for (k=0; k<coloring->ncolors; k++) { 2606 coloring->currentcolor = k; 2607 for (i=0; i<bs; i++) { 2608 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2609 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2610 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2611 /* 2612 Loop over each column associated with color 2613 adding the perturbation to the vector w3. 2614 */ 2615 for (l=0; l<coloring->ncolumns[k]; l++) { 2616 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2617 if (coloring->htype[0] == 'w') { 2618 dx = 1.0 + unorm; 2619 } else { 2620 dx = xx[col]; 2621 } 2622 if (dx == 0.0) dx = 1.0; 2623 #if !defined(PETSC_USE_COMPLEX) 2624 if (dx < umin && dx >= 0.0) dx = umin; 2625 else if (dx < 0.0 && dx > -umin) dx = -umin; 2626 #else 2627 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2628 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2629 #endif 2630 dx *= epsilon; 2631 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2632 w3_array[col] += dx; 2633 } 2634 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2635 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2636 2637 /* 2638 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2639 w2 = F(x1 + dx) - F(x1) 2640 */ 2641 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2642 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2643 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2644 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2645 2646 /* 2647 Loop over rows of vector, putting results into Jacobian matrix 2648 */ 2649 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2650 for (l=0; l<coloring->nrows[k]; l++) { 2651 row = bs*coloring->rows[k][l]; /* local row index */ 2652 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2653 for (j=0; j<bs; j++) { 2654 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2655 srows[j] = row + start + j; 2656 } 2657 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2658 } 2659 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2660 } 2661 } /* endof for each color */ 2662 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2663 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2664 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2665 ierr = PetscFree(srows);CHKERRQ(ierr); 2666 2667 coloring->currentcolor = -1; 2668 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2669 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2670 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 /* -------------------------------------------------------------------*/ 2675 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2676 MatGetRow_SeqBAIJ, 2677 MatRestoreRow_SeqBAIJ, 2678 MatMult_SeqBAIJ_N, 2679 /* 4*/ MatMultAdd_SeqBAIJ_N, 2680 MatMultTranspose_SeqBAIJ, 2681 MatMultTransposeAdd_SeqBAIJ, 2682 0, 2683 0, 2684 0, 2685 /*10*/ 0, 2686 MatLUFactor_SeqBAIJ, 2687 0, 2688 0, 2689 MatTranspose_SeqBAIJ, 2690 /*15*/ MatGetInfo_SeqBAIJ, 2691 MatEqual_SeqBAIJ, 2692 MatGetDiagonal_SeqBAIJ, 2693 MatDiagonalScale_SeqBAIJ, 2694 MatNorm_SeqBAIJ, 2695 /*20*/ 0, 2696 MatAssemblyEnd_SeqBAIJ, 2697 MatSetOption_SeqBAIJ, 2698 MatZeroEntries_SeqBAIJ, 2699 /*24*/ MatZeroRows_SeqBAIJ, 2700 0, 2701 0, 2702 0, 2703 0, 2704 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2705 0, 2706 0, 2707 MatGetArray_SeqBAIJ, 2708 MatRestoreArray_SeqBAIJ, 2709 /*34*/ MatDuplicate_SeqBAIJ, 2710 0, 2711 0, 2712 MatILUFactor_SeqBAIJ, 2713 0, 2714 /*39*/ MatAXPY_SeqBAIJ, 2715 MatGetSubMatrices_SeqBAIJ, 2716 MatIncreaseOverlap_SeqBAIJ, 2717 MatGetValues_SeqBAIJ, 2718 MatCopy_SeqBAIJ, 2719 /*44*/ 0, 2720 MatScale_SeqBAIJ, 2721 0, 2722 0, 2723 MatILUDTFactor_SeqBAIJ, 2724 /*49*/ 0, 2725 MatGetRowIJ_SeqBAIJ, 2726 MatRestoreRowIJ_SeqBAIJ, 2727 MatGetColumnIJ_SeqBAIJ, 2728 MatRestoreColumnIJ_SeqBAIJ, 2729 /*54*/ MatFDColoringCreate_SeqAIJ, 2730 0, 2731 0, 2732 0, 2733 MatSetValuesBlocked_SeqBAIJ, 2734 /*59*/ MatGetSubMatrix_SeqBAIJ, 2735 MatDestroy_SeqBAIJ, 2736 MatView_SeqBAIJ, 2737 0, 2738 0, 2739 /*64*/ 0, 2740 0, 2741 0, 2742 0, 2743 0, 2744 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2745 0, 2746 MatConvert_Basic, 2747 0, 2748 0, 2749 /*74*/ 0, 2750 MatFDColoringApply_BAIJ, 2751 0, 2752 0, 2753 0, 2754 /*79*/ 0, 2755 0, 2756 0, 2757 0, 2758 MatLoad_SeqBAIJ, 2759 /*84*/ 0, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*89*/ 0, 2765 0, 2766 0, 2767 0, 2768 0, 2769 /*94*/ 0, 2770 0, 2771 0, 2772 0, 2773 0, 2774 /*99*/0, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*104*/0, 2780 MatRealPart_SeqBAIJ, 2781 MatImaginaryPart_SeqBAIJ, 2782 0, 2783 0, 2784 /*109*/0, 2785 0, 2786 0, 2787 0, 2788 MatMissingDiagonal_SeqBAIJ 2789 /*114*/ 2790 }; 2791 2792 EXTERN_C_BEGIN 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2795 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2796 { 2797 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2798 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2799 PetscErrorCode ierr; 2800 2801 PetscFunctionBegin; 2802 if (aij->nonew != 1) { 2803 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2804 } 2805 2806 /* allocate space for values if not already there */ 2807 if (!aij->saved_values) { 2808 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2809 } 2810 2811 /* copy values over */ 2812 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 EXTERN_C_END 2816 2817 EXTERN_C_BEGIN 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2820 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2821 { 2822 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2823 PetscErrorCode ierr; 2824 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2825 2826 PetscFunctionBegin; 2827 if (aij->nonew != 1) { 2828 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2829 } 2830 if (!aij->saved_values) { 2831 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2832 } 2833 2834 /* copy values over */ 2835 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 EXTERN_C_END 2839 2840 EXTERN_C_BEGIN 2841 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2843 EXTERN_C_END 2844 2845 EXTERN_C_BEGIN 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2848 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2849 { 2850 Mat_SeqBAIJ *b; 2851 PetscErrorCode ierr; 2852 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2853 PetscTruth flg,skipallocation = PETSC_FALSE; 2854 2855 PetscFunctionBegin; 2856 2857 if (nz == MAT_SKIP_ALLOCATION) { 2858 skipallocation = PETSC_TRUE; 2859 nz = 0; 2860 } 2861 2862 if (bs < 0) { 2863 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2864 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2865 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2866 bs = PetscAbs(bs); 2867 } 2868 if (nnz && newbs != bs) { 2869 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2870 } 2871 bs = newbs; 2872 2873 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2874 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2875 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2876 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2877 2878 B->preallocated = PETSC_TRUE; 2879 2880 mbs = B->rmap->n/bs; 2881 nbs = B->cmap->n/bs; 2882 bs2 = bs*bs; 2883 2884 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2885 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2886 } 2887 2888 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2889 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2890 if (nnz) { 2891 for (i=0; i<mbs; i++) { 2892 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2893 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2894 } 2895 } 2896 2897 b = (Mat_SeqBAIJ*)B->data; 2898 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2899 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2900 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2901 2902 if (!flg) { 2903 switch (bs) { 2904 case 1: 2905 B->ops->mult = MatMult_SeqBAIJ_1; 2906 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2907 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2908 break; 2909 case 2: 2910 B->ops->mult = MatMult_SeqBAIJ_2; 2911 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2912 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2913 break; 2914 case 3: 2915 B->ops->mult = MatMult_SeqBAIJ_3; 2916 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2917 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2918 break; 2919 case 4: 2920 B->ops->mult = MatMult_SeqBAIJ_4; 2921 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2922 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2923 break; 2924 case 5: 2925 B->ops->mult = MatMult_SeqBAIJ_5; 2926 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2927 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2928 break; 2929 case 6: 2930 B->ops->mult = MatMult_SeqBAIJ_6; 2931 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2932 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2933 break; 2934 case 7: 2935 B->ops->mult = MatMult_SeqBAIJ_7; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2937 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2938 break; 2939 default: 2940 B->ops->mult = MatMult_SeqBAIJ_N; 2941 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2942 break; 2943 } 2944 } 2945 B->rmap->bs = bs; 2946 b->mbs = mbs; 2947 b->nbs = nbs; 2948 if (!skipallocation) { 2949 if (!b->imax) { 2950 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2951 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2952 b->free_imax_ilen = PETSC_TRUE; 2953 } 2954 /* b->ilen will count nonzeros in each block row so far. */ 2955 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2956 if (!nnz) { 2957 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2958 else if (nz <= 0) nz = 1; 2959 for (i=0; i<mbs; i++) b->imax[i] = nz; 2960 nz = nz*mbs; 2961 } else { 2962 nz = 0; 2963 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2964 } 2965 2966 /* allocate the matrix space */ 2967 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2968 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2969 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2970 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2971 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2972 b->singlemalloc = PETSC_TRUE; 2973 b->i[0] = 0; 2974 for (i=1; i<mbs+1; i++) { 2975 b->i[i] = b->i[i-1] + b->imax[i-1]; 2976 } 2977 b->free_a = PETSC_TRUE; 2978 b->free_ij = PETSC_TRUE; 2979 } else { 2980 b->free_a = PETSC_FALSE; 2981 b->free_ij = PETSC_FALSE; 2982 } 2983 2984 B->rmap->bs = bs; 2985 b->bs2 = bs2; 2986 b->mbs = mbs; 2987 b->nz = 0; 2988 b->maxnz = nz*bs2; 2989 B->info.nz_unneeded = (PetscReal)b->maxnz; 2990 PetscFunctionReturn(0); 2991 } 2992 EXTERN_C_END 2993 2994 EXTERN_C_BEGIN 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2997 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2998 { 2999 PetscInt i,m,nz,nz_max=0,*nnz; 3000 PetscScalar *values=0; 3001 PetscErrorCode ierr; 3002 3003 PetscFunctionBegin; 3004 3005 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3006 3007 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3008 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3009 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3010 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3011 m = B->rmap->n/bs; 3012 3013 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3014 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3015 for(i=0; i<m; i++) { 3016 nz = ii[i+1]- ii[i]; 3017 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3018 nz_max = PetscMax(nz_max, nz); 3019 nnz[i] = nz; 3020 } 3021 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3022 ierr = PetscFree(nnz);CHKERRQ(ierr); 3023 3024 values = (PetscScalar*)V; 3025 if (!values) { 3026 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3027 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3028 } 3029 for (i=0; i<m; i++) { 3030 PetscInt ncols = ii[i+1] - ii[i]; 3031 const PetscInt *icols = jj + ii[i]; 3032 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3033 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3034 } 3035 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3036 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3037 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 3039 PetscFunctionReturn(0); 3040 } 3041 EXTERN_C_END 3042 3043 3044 EXTERN_C_BEGIN 3045 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3046 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3047 EXTERN_C_END 3048 3049 /*MC 3050 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3051 block sparse compressed row format. 3052 3053 Options Database Keys: 3054 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3055 3056 Level: beginner 3057 3058 .seealso: MatCreateSeqBAIJ() 3059 M*/ 3060 3061 3062 EXTERN_C_BEGIN 3063 #undef __FUNCT__ 3064 #define __FUNCT__ "MatCreate_SeqBAIJ" 3065 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3066 { 3067 PetscErrorCode ierr; 3068 PetscMPIInt size; 3069 Mat_SeqBAIJ *b; 3070 3071 PetscFunctionBegin; 3072 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3073 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3074 3075 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3076 B->data = (void*)b; 3077 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3078 B->mapping = 0; 3079 b->row = 0; 3080 b->col = 0; 3081 b->icol = 0; 3082 b->reallocs = 0; 3083 b->saved_values = 0; 3084 3085 b->roworiented = PETSC_TRUE; 3086 b->nonew = 0; 3087 b->diag = 0; 3088 b->solve_work = 0; 3089 b->mult_work = 0; 3090 B->spptr = 0; 3091 B->info.nz_unneeded = (PetscReal)b->maxnz; 3092 b->keepnonzeropattern = PETSC_FALSE; 3093 b->xtoy = 0; 3094 b->XtoY = 0; 3095 b->compressedrow.use = PETSC_FALSE; 3096 b->compressedrow.nrows = 0; 3097 b->compressedrow.i = PETSC_NULL; 3098 b->compressedrow.rindex = PETSC_NULL; 3099 b->compressedrow.checked = PETSC_FALSE; 3100 B->same_nonzero = PETSC_FALSE; 3101 3102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 3103 "MatGetFactorAvailable_seqbaij_petsc", 3104 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 3106 "MatGetFactor_seqbaij_petsc", 3107 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3109 "MatInvertBlockDiagonal_SeqBAIJ", 3110 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3112 "MatStoreValues_SeqBAIJ", 3113 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3115 "MatRetrieveValues_SeqBAIJ", 3116 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3118 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3119 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3121 "MatConvert_SeqBAIJ_SeqAIJ", 3122 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3124 "MatConvert_SeqBAIJ_SeqSBAIJ", 3125 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3126 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3127 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3128 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3129 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3130 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3131 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3132 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 EXTERN_C_END 3136 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3139 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3140 { 3141 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3142 PetscErrorCode ierr; 3143 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3144 3145 PetscFunctionBegin; 3146 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3147 3148 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3149 c->imax = a->imax; 3150 c->ilen = a->ilen; 3151 c->free_imax_ilen = PETSC_FALSE; 3152 } else { 3153 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3154 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3155 for (i=0; i<mbs; i++) { 3156 c->imax[i] = a->imax[i]; 3157 c->ilen[i] = a->ilen[i]; 3158 } 3159 c->free_imax_ilen = PETSC_TRUE; 3160 } 3161 3162 /* allocate the matrix space */ 3163 if (mallocmatspace){ 3164 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3165 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3166 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3167 c->singlemalloc = PETSC_FALSE; 3168 c->free_ij = PETSC_FALSE; 3169 c->i = a->i; 3170 c->j = a->j; 3171 c->parent = A; 3172 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3173 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3174 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3175 } else { 3176 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3177 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3178 c->singlemalloc = PETSC_TRUE; 3179 c->free_ij = PETSC_TRUE; 3180 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3181 if (mbs > 0) { 3182 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3183 if (cpvalues == MAT_COPY_VALUES) { 3184 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3185 } else { 3186 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3187 } 3188 } 3189 } 3190 } 3191 3192 c->roworiented = a->roworiented; 3193 c->nonew = a->nonew; 3194 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 3195 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 3196 c->bs2 = a->bs2; 3197 c->mbs = a->mbs; 3198 c->nbs = a->nbs; 3199 3200 if (a->diag) { 3201 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3202 c->diag = a->diag; 3203 c->free_diag = PETSC_FALSE; 3204 } else { 3205 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3206 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3207 for (i=0; i<mbs; i++) { 3208 c->diag[i] = a->diag[i]; 3209 } 3210 c->free_diag = PETSC_TRUE; 3211 } 3212 } else c->diag = 0; 3213 c->nz = a->nz; 3214 c->maxnz = a->maxnz; 3215 c->solve_work = 0; 3216 c->mult_work = 0; 3217 c->free_a = PETSC_TRUE; 3218 c->free_ij = PETSC_TRUE; 3219 C->preallocated = PETSC_TRUE; 3220 C->assembled = PETSC_TRUE; 3221 3222 c->compressedrow.use = a->compressedrow.use; 3223 c->compressedrow.nrows = a->compressedrow.nrows; 3224 c->compressedrow.checked = a->compressedrow.checked; 3225 if ( a->compressedrow.checked && a->compressedrow.use){ 3226 i = a->compressedrow.nrows; 3227 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 3228 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3229 c->compressedrow.rindex = c->compressedrow.i + i + 1; 3230 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3231 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3232 } else { 3233 c->compressedrow.use = PETSC_FALSE; 3234 c->compressedrow.i = PETSC_NULL; 3235 c->compressedrow.rindex = PETSC_NULL; 3236 } 3237 C->same_nonzero = A->same_nonzero; 3238 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3244 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3245 { 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3250 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3251 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3252 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3253 PetscFunctionReturn(0); 3254 } 3255 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatLoad_SeqBAIJ" 3258 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3259 { 3260 Mat_SeqBAIJ *a; 3261 Mat B; 3262 PetscErrorCode ierr; 3263 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3264 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3265 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3266 PetscInt *masked,nmask,tmp,bs2,ishift; 3267 PetscMPIInt size; 3268 int fd; 3269 PetscScalar *aa; 3270 MPI_Comm comm = ((PetscObject)viewer)->comm; 3271 3272 PetscFunctionBegin; 3273 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3274 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3275 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3276 bs2 = bs*bs; 3277 3278 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3279 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3280 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3281 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3282 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3283 M = header[1]; N = header[2]; nz = header[3]; 3284 3285 if (header[3] < 0) { 3286 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3287 } 3288 3289 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3290 3291 /* 3292 This code adds extra rows to make sure the number of rows is 3293 divisible by the blocksize 3294 */ 3295 mbs = M/bs; 3296 extra_rows = bs - M + bs*(mbs); 3297 if (extra_rows == bs) extra_rows = 0; 3298 else mbs++; 3299 if (extra_rows) { 3300 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3301 } 3302 3303 /* read in row lengths */ 3304 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3305 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3306 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3307 3308 /* read in column indices */ 3309 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3310 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3311 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3312 3313 /* loop over row lengths determining block row lengths */ 3314 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3315 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3316 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3317 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3318 masked = mask + mbs; 3319 rowcount = 0; nzcount = 0; 3320 for (i=0; i<mbs; i++) { 3321 nmask = 0; 3322 for (j=0; j<bs; j++) { 3323 kmax = rowlengths[rowcount]; 3324 for (k=0; k<kmax; k++) { 3325 tmp = jj[nzcount++]/bs; 3326 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3327 } 3328 rowcount++; 3329 } 3330 browlengths[i] += nmask; 3331 /* zero out the mask elements we set */ 3332 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3333 } 3334 3335 /* create our matrix */ 3336 ierr = MatCreate(comm,&B); 3337 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3338 ierr = MatSetType(B,type);CHKERRQ(ierr); 3339 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3340 a = (Mat_SeqBAIJ*)B->data; 3341 3342 /* set matrix "i" values */ 3343 a->i[0] = 0; 3344 for (i=1; i<= mbs; i++) { 3345 a->i[i] = a->i[i-1] + browlengths[i-1]; 3346 a->ilen[i-1] = browlengths[i-1]; 3347 } 3348 a->nz = 0; 3349 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3350 3351 /* read in nonzero values */ 3352 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3353 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3354 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3355 3356 /* set "a" and "j" values into matrix */ 3357 nzcount = 0; jcount = 0; 3358 for (i=0; i<mbs; i++) { 3359 nzcountb = nzcount; 3360 nmask = 0; 3361 for (j=0; j<bs; j++) { 3362 kmax = rowlengths[i*bs+j]; 3363 for (k=0; k<kmax; k++) { 3364 tmp = jj[nzcount++]/bs; 3365 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3366 } 3367 } 3368 /* sort the masked values */ 3369 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3370 3371 /* set "j" values into matrix */ 3372 maskcount = 1; 3373 for (j=0; j<nmask; j++) { 3374 a->j[jcount++] = masked[j]; 3375 mask[masked[j]] = maskcount++; 3376 } 3377 /* set "a" values into matrix */ 3378 ishift = bs2*a->i[i]; 3379 for (j=0; j<bs; j++) { 3380 kmax = rowlengths[i*bs+j]; 3381 for (k=0; k<kmax; k++) { 3382 tmp = jj[nzcountb]/bs ; 3383 block = mask[tmp] - 1; 3384 point = jj[nzcountb] - bs*tmp; 3385 idx = ishift + bs2*block + j + bs*point; 3386 a->a[idx] = (MatScalar)aa[nzcountb++]; 3387 } 3388 } 3389 /* zero out the mask elements we set */ 3390 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3391 } 3392 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3393 3394 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3395 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3396 ierr = PetscFree(aa);CHKERRQ(ierr); 3397 ierr = PetscFree(jj);CHKERRQ(ierr); 3398 ierr = PetscFree(mask);CHKERRQ(ierr); 3399 3400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3402 ierr = MatView_Private(B);CHKERRQ(ierr); 3403 3404 *A = B; 3405 PetscFunctionReturn(0); 3406 } 3407 3408 #undef __FUNCT__ 3409 #define __FUNCT__ "MatCreateSeqBAIJ" 3410 /*@C 3411 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3412 compressed row) format. For good matrix assembly performance the 3413 user should preallocate the matrix storage by setting the parameter nz 3414 (or the array nnz). By setting these parameters accurately, performance 3415 during matrix assembly can be increased by more than a factor of 50. 3416 3417 Collective on MPI_Comm 3418 3419 Input Parameters: 3420 + comm - MPI communicator, set to PETSC_COMM_SELF 3421 . bs - size of block 3422 . m - number of rows 3423 . n - number of columns 3424 . nz - number of nonzero blocks per block row (same for all rows) 3425 - nnz - array containing the number of nonzero blocks in the various block rows 3426 (possibly different for each block row) or PETSC_NULL 3427 3428 Output Parameter: 3429 . A - the matrix 3430 3431 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3432 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3433 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3434 3435 Options Database Keys: 3436 . -mat_no_unroll - uses code that does not unroll the loops in the 3437 block calculations (much slower) 3438 . -mat_block_size - size of the blocks to use 3439 3440 Level: intermediate 3441 3442 Notes: 3443 The number of rows and columns must be divisible by blocksize. 3444 3445 If the nnz parameter is given then the nz parameter is ignored 3446 3447 A nonzero block is any block that as 1 or more nonzeros in it 3448 3449 The block AIJ format is fully compatible with standard Fortran 77 3450 storage. That is, the stored row and column indices can begin at 3451 either one (as in Fortran) or zero. See the users' manual for details. 3452 3453 Specify the preallocated storage with either nz or nnz (not both). 3454 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3455 allocation. For additional details, see the users manual chapter on 3456 matrices. 3457 3458 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3459 @*/ 3460 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3461 { 3462 PetscErrorCode ierr; 3463 3464 PetscFunctionBegin; 3465 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3466 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3467 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3468 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3469 PetscFunctionReturn(0); 3470 } 3471 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3474 /*@C 3475 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3476 per row in the matrix. For good matrix assembly performance the 3477 user should preallocate the matrix storage by setting the parameter nz 3478 (or the array nnz). By setting these parameters accurately, performance 3479 during matrix assembly can be increased by more than a factor of 50. 3480 3481 Collective on MPI_Comm 3482 3483 Input Parameters: 3484 + A - the matrix 3485 . bs - size of block 3486 . nz - number of block nonzeros per block row (same for all rows) 3487 - nnz - array containing the number of block nonzeros in the various block rows 3488 (possibly different for each block row) or PETSC_NULL 3489 3490 Options Database Keys: 3491 . -mat_no_unroll - uses code that does not unroll the loops in the 3492 block calculations (much slower) 3493 . -mat_block_size - size of the blocks to use 3494 3495 Level: intermediate 3496 3497 Notes: 3498 If the nnz parameter is given then the nz parameter is ignored 3499 3500 You can call MatGetInfo() to get information on how effective the preallocation was; 3501 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3502 You can also run with the option -info and look for messages with the string 3503 malloc in them to see if additional memory allocation was needed. 3504 3505 The block AIJ format is fully compatible with standard Fortran 77 3506 storage. That is, the stored row and column indices can begin at 3507 either one (as in Fortran) or zero. See the users' manual for details. 3508 3509 Specify the preallocated storage with either nz or nnz (not both). 3510 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3511 allocation. For additional details, see the users manual chapter on 3512 matrices. 3513 3514 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3515 @*/ 3516 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3517 { 3518 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3519 3520 PetscFunctionBegin; 3521 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3522 if (f) { 3523 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3524 } 3525 PetscFunctionReturn(0); 3526 } 3527 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3530 /*@C 3531 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3532 (the default sequential PETSc format). 3533 3534 Collective on MPI_Comm 3535 3536 Input Parameters: 3537 + A - the matrix 3538 . i - the indices into j for the start of each local row (starts with zero) 3539 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3540 - v - optional values in the matrix 3541 3542 Level: developer 3543 3544 .keywords: matrix, aij, compressed row, sparse 3545 3546 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3547 @*/ 3548 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3549 { 3550 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3551 3552 PetscFunctionBegin; 3553 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3554 if (f) { 3555 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3556 } 3557 PetscFunctionReturn(0); 3558 } 3559 3560 3561 #undef __FUNCT__ 3562 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3563 /*@ 3564 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3565 (upper triangular entries in CSR format) provided by the user. 3566 3567 Collective on MPI_Comm 3568 3569 Input Parameters: 3570 + comm - must be an MPI communicator of size 1 3571 . bs - size of block 3572 . m - number of rows 3573 . n - number of columns 3574 . i - row indices 3575 . j - column indices 3576 - a - matrix values 3577 3578 Output Parameter: 3579 . mat - the matrix 3580 3581 Level: intermediate 3582 3583 Notes: 3584 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3585 once the matrix is destroyed 3586 3587 You cannot set new nonzero locations into this matrix, that will generate an error. 3588 3589 The i and j indices are 0 based 3590 3591 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3592 3593 @*/ 3594 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3595 { 3596 PetscErrorCode ierr; 3597 PetscInt ii; 3598 Mat_SeqBAIJ *baij; 3599 3600 PetscFunctionBegin; 3601 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3602 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3603 3604 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3605 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3606 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3607 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3608 baij = (Mat_SeqBAIJ*)(*mat)->data; 3609 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3610 3611 baij->i = i; 3612 baij->j = j; 3613 baij->a = a; 3614 baij->singlemalloc = PETSC_FALSE; 3615 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3616 baij->free_a = PETSC_FALSE; 3617 baij->free_ij = PETSC_FALSE; 3618 3619 for (ii=0; ii<m; ii++) { 3620 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3621 #if defined(PETSC_USE_DEBUG) 3622 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3623 #endif 3624 } 3625 #if defined(PETSC_USE_DEBUG) 3626 for (ii=0; ii<baij->i[m]; ii++) { 3627 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3628 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3629 } 3630 #endif 3631 3632 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3633 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3634 PetscFunctionReturn(0); 3635 } 3636