1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 124 PetscErrorCode ierr; 125 PetscInt m = a->mbs,i,i2,nz,idx; 126 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 127 128 PetscFunctionBegin; 129 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 130 its = its*lits; 131 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 132 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 133 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 134 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 135 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 136 137 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 138 139 diag = a->diag; 140 idiag = a->idiag; 141 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 142 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 143 144 if (flag & SOR_ZERO_INITIAL_GUESS) { 145 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 146 x[0] = b[0]*idiag[0]; 147 i2 = 1; 148 idiag += 1; 149 for (i=1; i<m; i++) { 150 v = aa + ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; 154 while (nz--) { 155 idx = (*vi++); 156 x1 = x[idx]; 157 s1 -= v[0]*x1; 158 v += 1; 159 } 160 x[i2] = idiag[0]*s1; 161 idiag += 1; 162 i2 += 1; 163 } 164 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 165 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 166 } 167 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 168 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 169 i2 = 0; 170 mdiag = a->idiag+a->mbs; 171 for (i=0; i<m; i++) { 172 x1 = x[i2]; 173 x[i2] = mdiag[0]*x1; 174 mdiag += 1; 175 i2 += 1; 176 } 177 ierr = PetscLogFlops(m);CHKERRQ(ierr); 178 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 179 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 180 } 181 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 182 idiag = a->idiag+a->mbs - 1; 183 i2 = m - 1; 184 x1 = x[i2]; 185 x[i2] = idiag[0]*x1; 186 idiag -= 1; 187 i2 -= 1; 188 for (i=m-2; i>=0; i--) { 189 v = aa + (diag[i]+1); 190 vi = aj + diag[i] + 1; 191 nz = ai[i+1] - diag[i] - 1; 192 s1 = x[i2]; 193 while (nz--) { 194 idx = (*vi++); 195 x1 = x[idx]; 196 s1 -= v[0]*x1; 197 v += 1; 198 } 199 x[i2] = idiag[0]*s1; 200 idiag -= 2; 201 i2 -= 1; 202 } 203 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 204 } 205 } else { 206 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 207 } 208 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 209 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 215 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 PetscScalar *x,x1,x2,s1,s2; 219 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 220 PetscErrorCode ierr; 221 PetscInt m = a->mbs,i,i2,nz,idx; 222 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 223 224 PetscFunctionBegin; 225 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 226 its = its*lits; 227 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 228 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 229 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 230 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 231 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 232 233 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 234 235 diag = a->diag; 236 idiag = a->idiag; 237 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 238 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 239 240 if (flag & SOR_ZERO_INITIAL_GUESS) { 241 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 242 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 243 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 244 i2 = 2; 245 idiag += 4; 246 for (i=1; i<m; i++) { 247 v = aa + 4*ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s1 = b[i2]; s2 = b[i2+1]; 251 while (nz--) { 252 idx = 2*(*vi++); 253 x1 = x[idx]; x2 = x[1+idx]; 254 s1 -= v[0]*x1 + v[2]*x2; 255 s2 -= v[1]*x1 + v[3]*x2; 256 v += 4; 257 } 258 x[i2] = idiag[0]*s1 + idiag[2]*s2; 259 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 260 idiag += 4; 261 i2 += 2; 262 } 263 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 264 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 265 } 266 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 267 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 268 i2 = 0; 269 mdiag = a->idiag+4*a->mbs; 270 for (i=0; i<m; i++) { 271 x1 = x[i2]; x2 = x[i2+1]; 272 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 273 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 274 mdiag += 4; 275 i2 += 2; 276 } 277 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 278 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 279 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 280 } 281 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 282 idiag = a->idiag+4*a->mbs - 4; 283 i2 = 2*m - 2; 284 x1 = x[i2]; x2 = x[i2+1]; 285 x[i2] = idiag[0]*x1 + idiag[2]*x2; 286 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 287 idiag -= 4; 288 i2 -= 2; 289 for (i=m-2; i>=0; i--) { 290 v = aa + 4*(diag[i]+1); 291 vi = aj + diag[i] + 1; 292 nz = ai[i+1] - diag[i] - 1; 293 s1 = x[i2]; s2 = x[i2+1]; 294 while (nz--) { 295 idx = 2*(*vi++); 296 x1 = x[idx]; x2 = x[1+idx]; 297 s1 -= v[0]*x1 + v[2]*x2; 298 s2 -= v[1]*x1 + v[3]*x2; 299 v += 4; 300 } 301 x[i2] = idiag[0]*s1 + idiag[2]*s2; 302 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 303 idiag -= 4; 304 i2 -= 2; 305 } 306 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 310 } 311 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 312 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 318 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 319 { 320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 321 PetscScalar *x,x1,x2,x3,s1,s2,s3; 322 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 323 PetscErrorCode ierr; 324 PetscInt m = a->mbs,i,i2,nz,idx; 325 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 326 327 PetscFunctionBegin; 328 its = its*lits; 329 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 330 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 331 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 332 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 333 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 334 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 335 336 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 337 338 diag = a->diag; 339 idiag = a->idiag; 340 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 341 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 342 343 if (flag & SOR_ZERO_INITIAL_GUESS) { 344 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 345 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 346 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 347 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 348 i2 = 3; 349 idiag += 9; 350 for (i=1; i<m; i++) { 351 v = aa + 9*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 355 while (nz--) { 356 idx = 3*(*vi++); 357 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 358 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 359 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 360 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 361 v += 9; 362 } 363 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 364 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 365 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 366 idiag += 9; 367 i2 += 3; 368 } 369 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 370 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 371 } 372 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 373 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 374 i2 = 0; 375 mdiag = a->idiag+9*a->mbs; 376 for (i=0; i<m; i++) { 377 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 378 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 379 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 380 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 381 mdiag += 9; 382 i2 += 3; 383 } 384 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 385 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 386 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 389 idiag = a->idiag+9*a->mbs - 9; 390 i2 = 3*m - 3; 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 392 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 393 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 394 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 395 idiag -= 9; 396 i2 -= 3; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 9*(diag[i]+1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 402 while (nz--) { 403 idx = 3*(*vi++); 404 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 405 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 406 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 407 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 408 v += 9; 409 } 410 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 411 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 412 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 413 idiag -= 9; 414 i2 -= 3; 415 } 416 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 417 } 418 } else { 419 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 420 } 421 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 422 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 428 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 429 { 430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 431 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 432 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 433 PetscErrorCode ierr; 434 PetscInt m = a->mbs,i,i2,nz,idx; 435 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 436 437 PetscFunctionBegin; 438 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 439 its = its*lits; 440 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 441 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 442 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 443 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 444 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 445 446 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 447 448 diag = a->diag; 449 idiag = a->idiag; 450 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 451 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 452 453 if (flag & SOR_ZERO_INITIAL_GUESS) { 454 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 455 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 456 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 457 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 458 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 459 i2 = 4; 460 idiag += 16; 461 for (i=1; i<m; i++) { 462 v = aa + 16*ai[i]; 463 vi = aj + ai[i]; 464 nz = diag[i] - ai[i]; 465 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 466 while (nz--) { 467 idx = 4*(*vi++); 468 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 469 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 470 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 471 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 472 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 473 v += 16; 474 } 475 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 476 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 477 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 478 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 479 idiag += 16; 480 i2 += 4; 481 } 482 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 483 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 484 } 485 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 486 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 487 i2 = 0; 488 mdiag = a->idiag+16*a->mbs; 489 for (i=0; i<m; i++) { 490 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 491 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 492 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 493 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 494 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 495 mdiag += 16; 496 i2 += 4; 497 } 498 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 499 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 500 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 501 } 502 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 503 idiag = a->idiag+16*a->mbs - 16; 504 i2 = 4*m - 4; 505 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 506 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 507 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 508 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 509 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 510 idiag -= 16; 511 i2 -= 4; 512 for (i=m-2; i>=0; i--) { 513 v = aa + 16*(diag[i]+1); 514 vi = aj + diag[i] + 1; 515 nz = ai[i+1] - diag[i] - 1; 516 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 517 while (nz--) { 518 idx = 4*(*vi++); 519 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 520 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 521 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 522 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 523 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 524 v += 16; 525 } 526 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 527 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 528 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 529 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 530 idiag -= 16; 531 i2 -= 4; 532 } 533 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 534 } 535 } else { 536 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 537 } 538 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 545 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 546 { 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 549 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 550 PetscErrorCode ierr; 551 PetscInt m = a->mbs,i,i2,nz,idx; 552 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 553 554 PetscFunctionBegin; 555 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 556 its = its*lits; 557 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 558 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 559 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 560 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 561 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 562 563 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 564 565 diag = a->diag; 566 idiag = a->idiag; 567 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 569 570 if (flag & SOR_ZERO_INITIAL_GUESS) { 571 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 572 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 573 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 574 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 575 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 576 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 577 i2 = 5; 578 idiag += 25; 579 for (i=1; i<m; i++) { 580 v = aa + 25*ai[i]; 581 vi = aj + ai[i]; 582 nz = diag[i] - ai[i]; 583 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 584 while (nz--) { 585 idx = 5*(*vi++); 586 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 587 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 588 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 589 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 590 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 591 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 592 v += 25; 593 } 594 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 595 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 596 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 597 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 598 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 599 idiag += 25; 600 i2 += 5; 601 } 602 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 603 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 604 } 605 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 606 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 607 i2 = 0; 608 mdiag = a->idiag+25*a->mbs; 609 for (i=0; i<m; i++) { 610 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 611 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 612 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 613 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 614 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 615 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 616 mdiag += 25; 617 i2 += 5; 618 } 619 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 620 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 621 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 622 } 623 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 624 idiag = a->idiag+25*a->mbs - 25; 625 i2 = 5*m - 5; 626 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 627 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 628 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 629 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 630 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 631 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 632 idiag -= 25; 633 i2 -= 5; 634 for (i=m-2; i>=0; i--) { 635 v = aa + 25*(diag[i]+1); 636 vi = aj + diag[i] + 1; 637 nz = ai[i+1] - diag[i] - 1; 638 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 639 while (nz--) { 640 idx = 5*(*vi++); 641 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 642 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 643 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 644 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 645 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 646 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 647 v += 25; 648 } 649 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 650 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 651 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 652 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 653 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 654 idiag -= 25; 655 i2 -= 5; 656 } 657 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 658 } 659 } else { 660 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 661 } 662 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 669 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 670 { 671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 672 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 673 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 674 PetscErrorCode ierr; 675 PetscInt m = a->mbs,i,i2,nz,idx; 676 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 677 678 PetscFunctionBegin; 679 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 680 its = its*lits; 681 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 682 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 683 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 684 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 685 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 686 687 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 688 689 diag = a->diag; 690 idiag = a->idiag; 691 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 693 694 if (flag & SOR_ZERO_INITIAL_GUESS) { 695 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 696 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 697 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 698 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 699 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 700 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 701 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 702 i2 = 6; 703 idiag += 36; 704 for (i=1; i<m; i++) { 705 v = aa + 36*ai[i]; 706 vi = aj + ai[i]; 707 nz = diag[i] - ai[i]; 708 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 709 while (nz--) { 710 idx = 6*(*vi++); 711 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 712 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 713 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 714 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 715 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 716 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 717 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 718 v += 36; 719 } 720 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 721 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 722 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 723 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 724 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 725 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 726 idiag += 36; 727 i2 += 6; 728 } 729 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 730 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 731 } 732 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 733 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 734 i2 = 0; 735 mdiag = a->idiag+36*a->mbs; 736 for (i=0; i<m; i++) { 737 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 738 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 739 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 740 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 741 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 742 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 743 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 744 mdiag += 36; 745 i2 += 6; 746 } 747 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 748 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 749 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 750 } 751 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 752 idiag = a->idiag+36*a->mbs - 36; 753 i2 = 6*m - 6; 754 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 755 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 756 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 757 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 758 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 759 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 760 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 761 idiag -= 36; 762 i2 -= 6; 763 for (i=m-2; i>=0; i--) { 764 v = aa + 36*(diag[i]+1); 765 vi = aj + diag[i] + 1; 766 nz = ai[i+1] - diag[i] - 1; 767 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 768 while (nz--) { 769 idx = 6*(*vi++); 770 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 771 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 772 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 773 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 774 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 775 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 776 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 777 v += 36; 778 } 779 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 780 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 781 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 782 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 783 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 784 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 785 idiag -= 36; 786 i2 -= 6; 787 } 788 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 789 } 790 } else { 791 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 792 } 793 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 794 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 800 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 801 { 802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 803 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 804 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 805 PetscErrorCode ierr; 806 PetscInt m = a->mbs,i,i2,nz,idx; 807 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 808 809 PetscFunctionBegin; 810 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 811 its = its*lits; 812 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 813 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 814 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 815 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 816 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 817 818 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 819 820 diag = a->diag; 821 idiag = a->idiag; 822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 823 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 824 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 827 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 828 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 829 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 830 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 831 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 832 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 833 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 834 i2 = 7; 835 idiag += 49; 836 for (i=1; i<m; i++) { 837 v = aa + 49*ai[i]; 838 vi = aj + ai[i]; 839 nz = diag[i] - ai[i]; 840 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 841 while (nz--) { 842 idx = 7*(*vi++); 843 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 844 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 845 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 846 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 847 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 848 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 849 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 850 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 851 v += 49; 852 } 853 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 854 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 855 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 856 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 857 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 858 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 859 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 860 idiag += 49; 861 i2 += 7; 862 } 863 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 864 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 865 } 866 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 867 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 868 i2 = 0; 869 mdiag = a->idiag+49*a->mbs; 870 for (i=0; i<m; i++) { 871 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 872 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 873 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 874 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 875 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 876 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 877 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 878 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 879 mdiag += 36; 880 i2 += 6; 881 } 882 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 883 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 884 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 885 } 886 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 887 idiag = a->idiag+49*a->mbs - 49; 888 i2 = 7*m - 7; 889 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 890 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 891 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 892 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 893 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 894 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 895 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 896 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 897 idiag -= 49; 898 i2 -= 7; 899 for (i=m-2; i>=0; i--) { 900 v = aa + 49*(diag[i]+1); 901 vi = aj + diag[i] + 1; 902 nz = ai[i+1] - diag[i] - 1; 903 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 904 while (nz--) { 905 idx = 7*(*vi++); 906 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 907 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 908 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 909 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 910 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 911 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 912 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 913 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 914 v += 49; 915 } 916 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 917 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 918 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 919 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 920 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 921 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 922 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 923 idiag -= 49; 924 i2 -= 7; 925 } 926 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 927 } 928 } else { 929 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 930 } 931 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 932 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 /* 937 Special version for direct calls from Fortran (Used in PETSc-fun3d) 938 */ 939 #if defined(PETSC_HAVE_FORTRAN_CAPS) 940 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 941 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 942 #define matsetvaluesblocked4_ matsetvaluesblocked4 943 #endif 944 945 EXTERN_C_BEGIN 946 #undef __FUNCT__ 947 #define __FUNCT__ "matsetvaluesblocked4_" 948 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 949 { 950 Mat A = *AA; 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 953 PetscInt *ai=a->i,*ailen=a->ilen; 954 PetscInt *aj=a->j,stepval,lastcol = -1; 955 const PetscScalar *value = v; 956 MatScalar *ap,*aa = a->a,*bap; 957 958 PetscFunctionBegin; 959 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 960 stepval = (n-1)*4; 961 for (k=0; k<m; k++) { /* loop over added rows */ 962 row = im[k]; 963 rp = aj + ai[row]; 964 ap = aa + 16*ai[row]; 965 nrow = ailen[row]; 966 low = 0; 967 high = nrow; 968 for (l=0; l<n; l++) { /* loop over added columns */ 969 col = in[l]; 970 if (col <= lastcol) low = 0; else high = nrow; 971 lastcol = col; 972 value = v + k*(stepval+4 + l)*4; 973 while (high-low > 7) { 974 t = (low+high)/2; 975 if (rp[t] > col) high = t; 976 else low = t; 977 } 978 for (i=low; i<high; i++) { 979 if (rp[i] > col) break; 980 if (rp[i] == col) { 981 bap = ap + 16*i; 982 for (ii=0; ii<4; ii++,value+=stepval) { 983 for (jj=ii; jj<16; jj+=4) { 984 bap[jj] += *value++; 985 } 986 } 987 goto noinsert2; 988 } 989 } 990 N = nrow++ - 1; 991 high++; /* added new column index thus must search to one higher than before */ 992 /* shift up all the later entries in this row */ 993 for (ii=N; ii>=i; ii--) { 994 rp[ii+1] = rp[ii]; 995 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 996 } 997 if (N >= i) { 998 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 999 } 1000 rp[i] = col; 1001 bap = ap + 16*i; 1002 for (ii=0; ii<4; ii++,value+=stepval) { 1003 for (jj=ii; jj<16; jj+=4) { 1004 bap[jj] = *value++; 1005 } 1006 } 1007 noinsert2:; 1008 low = i; 1009 } 1010 ailen[row] = nrow; 1011 } 1012 PetscFunctionReturnVoid(); 1013 } 1014 EXTERN_C_END 1015 1016 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1017 #define matsetvalues4_ MATSETVALUES4 1018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1019 #define matsetvalues4_ matsetvalues4 1020 #endif 1021 1022 EXTERN_C_BEGIN 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatSetValues4_" 1025 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1026 { 1027 Mat A = *AA; 1028 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1029 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1030 PetscInt *ai=a->i,*ailen=a->ilen; 1031 PetscInt *aj=a->j,brow,bcol; 1032 PetscInt ridx,cidx,lastcol = -1; 1033 MatScalar *ap,value,*aa=a->a,*bap; 1034 1035 PetscFunctionBegin; 1036 for (k=0; k<m; k++) { /* loop over added rows */ 1037 row = im[k]; brow = row/4; 1038 rp = aj + ai[brow]; 1039 ap = aa + 16*ai[brow]; 1040 nrow = ailen[brow]; 1041 low = 0; 1042 high = nrow; 1043 for (l=0; l<n; l++) { /* loop over added columns */ 1044 col = in[l]; bcol = col/4; 1045 ridx = row % 4; cidx = col % 4; 1046 value = v[l + k*n]; 1047 if (col <= lastcol) low = 0; else high = nrow; 1048 lastcol = col; 1049 while (high-low > 7) { 1050 t = (low+high)/2; 1051 if (rp[t] > bcol) high = t; 1052 else low = t; 1053 } 1054 for (i=low; i<high; i++) { 1055 if (rp[i] > bcol) break; 1056 if (rp[i] == bcol) { 1057 bap = ap + 16*i + 4*cidx + ridx; 1058 *bap += value; 1059 goto noinsert1; 1060 } 1061 } 1062 N = nrow++ - 1; 1063 high++; /* added new column thus must search to one higher than before */ 1064 /* shift up all the later entries in this row */ 1065 for (ii=N; ii>=i; ii--) { 1066 rp[ii+1] = rp[ii]; 1067 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1068 } 1069 if (N>=i) { 1070 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1071 } 1072 rp[i] = bcol; 1073 ap[16*i + 4*cidx + ridx] = value; 1074 noinsert1:; 1075 low = i; 1076 } 1077 ailen[brow] = nrow; 1078 } 1079 PetscFunctionReturnVoid(); 1080 } 1081 EXTERN_C_END 1082 1083 /* 1084 Checks for missing diagonals 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1088 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1089 { 1090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt *diag,*jj = a->j,i; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1096 *missing = PETSC_FALSE; 1097 diag = a->diag; 1098 for (i=0; i<a->mbs; i++) { 1099 if (jj[diag[i]] != i) { 1100 *missing = PETSC_TRUE; 1101 if (d) *d = i; 1102 } 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1109 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt i,j,m = a->mbs; 1114 1115 PetscFunctionBegin; 1116 if (!a->diag) { 1117 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1118 } 1119 for (i=0; i<m; i++) { 1120 a->diag[i] = a->i[i+1]; 1121 for (j=a->i[i]; j<a->i[i+1]; j++) { 1122 if (a->j[j] == i) { 1123 a->diag[i] = j; 1124 break; 1125 } 1126 } 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 1131 1132 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1136 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1137 { 1138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1141 PetscInt *tia, *tja; 1142 1143 PetscFunctionBegin; 1144 *nn = n; 1145 if (!ia) PetscFunctionReturn(0); 1146 if (symmetric) { 1147 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1148 } else { 1149 tia = a->i; tja = a->j; 1150 } 1151 1152 if (!blockcompressed && bs > 1) { 1153 (*nn) *= bs; 1154 nbs = bs; 1155 /* malloc & create the natural set of indices */ 1156 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1157 if (n) { 1158 (*ia)[0] = 0; 1159 for (j=1; j<bs; j++) { 1160 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1161 } 1162 } 1163 1164 for (i=1; i<n; i++) { 1165 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1166 for (j=1; j<bs; j++) { 1167 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1168 } 1169 } 1170 if (n) { 1171 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1172 } 1173 1174 if (ja) { 1175 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1176 cnt = 0; 1177 for (i=0; i<n; i++) { 1178 for (j=0; j<bs; j++) { 1179 for (k=tia[i]; k<tia[i+1]; k++) { 1180 for (l=0; l<bs; l++) { 1181 (*ja)[cnt++] = bs*tja[k] + l; 1182 } 1183 } 1184 } 1185 } 1186 } 1187 1188 n *= bs; 1189 nz *= bs*bs; 1190 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1191 ierr = PetscFree(tia);CHKERRQ(ierr); 1192 ierr = PetscFree(tja);CHKERRQ(ierr); 1193 } 1194 } else { 1195 *ia = tia; 1196 if (ja) *ja = tja; 1197 } 1198 if (oshift == 1) { 1199 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1200 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1201 } 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1207 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1208 { 1209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1210 PetscErrorCode ierr; 1211 PetscInt i,n = a->mbs,nz = a->i[n]; 1212 1213 PetscFunctionBegin; 1214 if (!ia) PetscFunctionReturn(0); 1215 if (!blockcompressed && A->rmap.bs > 1) { 1216 ierr = PetscFree(*ia);CHKERRQ(ierr); 1217 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1218 } else if (symmetric) { 1219 ierr = PetscFree(*ia);CHKERRQ(ierr); 1220 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1221 } else if (oshift == 1) { /* blockcompressed */ 1222 for (i=0; i<n+1; i++) a->i[i]--; 1223 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1230 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1231 { 1232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 #if defined(PETSC_USE_LOG) 1237 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1238 #endif 1239 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1240 if (a->row) { 1241 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1242 } 1243 if (a->col) { 1244 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1245 } 1246 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1247 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1248 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1249 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1250 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1251 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1252 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1253 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1254 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1255 1256 ierr = PetscFree(a);CHKERRQ(ierr); 1257 1258 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1262 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1263 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1265 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1271 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1272 { 1273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 switch (op) { 1278 case MAT_ROW_ORIENTED: 1279 a->roworiented = flg; 1280 break; 1281 case MAT_KEEP_ZEROED_ROWS: 1282 a->keepzeroedrows = flg; 1283 break; 1284 case MAT_NEW_NONZERO_LOCATIONS: 1285 a->nonew = (flg ? 0 : 1); 1286 break; 1287 case MAT_NEW_NONZERO_LOCATION_ERR: 1288 a->nonew = (flg ? -1 : 0); 1289 break; 1290 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1291 a->nonew = (flg ? -2 : 0); 1292 break; 1293 case MAT_NEW_DIAGONALS: 1294 case MAT_IGNORE_OFF_PROC_ENTRIES: 1295 case MAT_USE_HASH_TABLE: 1296 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1297 break; 1298 case MAT_SYMMETRIC: 1299 case MAT_STRUCTURALLY_SYMMETRIC: 1300 case MAT_HERMITIAN: 1301 case MAT_SYMMETRY_ETERNAL: 1302 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1303 break; 1304 default: 1305 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1312 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1317 MatScalar *aa,*aa_i; 1318 PetscScalar *v_i; 1319 1320 PetscFunctionBegin; 1321 bs = A->rmap.bs; 1322 ai = a->i; 1323 aj = a->j; 1324 aa = a->a; 1325 bs2 = a->bs2; 1326 1327 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1328 1329 bn = row/bs; /* Block number */ 1330 bp = row % bs; /* Block Position */ 1331 M = ai[bn+1] - ai[bn]; 1332 *nz = bs*M; 1333 1334 if (v) { 1335 *v = 0; 1336 if (*nz) { 1337 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1338 for (i=0; i<M; i++) { /* for each block in the block row */ 1339 v_i = *v + i*bs; 1340 aa_i = aa + bs2*(ai[bn] + i); 1341 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1342 } 1343 } 1344 } 1345 1346 if (idx) { 1347 *idx = 0; 1348 if (*nz) { 1349 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1350 for (i=0; i<M; i++) { /* for each block in the block row */ 1351 idx_i = *idx + i*bs; 1352 itmp = bs*aj[ai[bn] + i]; 1353 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1354 } 1355 } 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1362 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1363 { 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1368 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1374 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1375 { 1376 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1377 Mat C; 1378 PetscErrorCode ierr; 1379 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1380 PetscInt *rows,*cols,bs2=a->bs2; 1381 PetscScalar *array; 1382 1383 PetscFunctionBegin; 1384 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1385 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1386 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1387 1388 array = a->a; 1389 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1390 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1391 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1392 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1393 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1394 ierr = PetscFree(col);CHKERRQ(ierr); 1395 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1396 cols = rows + bs; 1397 for (i=0; i<mbs; i++) { 1398 cols[0] = i*bs; 1399 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1400 len = ai[i+1] - ai[i]; 1401 for (j=0; j<len; j++) { 1402 rows[0] = (*aj++)*bs; 1403 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1404 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1405 array += bs2; 1406 } 1407 } 1408 ierr = PetscFree(rows);CHKERRQ(ierr); 1409 1410 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 1413 if (B) { 1414 *B = C; 1415 } else { 1416 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1423 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1424 { 1425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1426 PetscErrorCode ierr; 1427 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1428 int fd; 1429 PetscScalar *aa; 1430 FILE *file; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1434 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1435 col_lens[0] = MAT_FILE_COOKIE; 1436 1437 col_lens[1] = A->rmap.N; 1438 col_lens[2] = A->cmap.n; 1439 col_lens[3] = a->nz*bs2; 1440 1441 /* store lengths of each row and write (including header) to file */ 1442 count = 0; 1443 for (i=0; i<a->mbs; i++) { 1444 for (j=0; j<bs; j++) { 1445 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1446 } 1447 } 1448 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1449 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1450 1451 /* store column indices (zero start index) */ 1452 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1453 count = 0; 1454 for (i=0; i<a->mbs; i++) { 1455 for (j=0; j<bs; j++) { 1456 for (k=a->i[i]; k<a->i[i+1]; k++) { 1457 for (l=0; l<bs; l++) { 1458 jj[count++] = bs*a->j[k] + l; 1459 } 1460 } 1461 } 1462 } 1463 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1464 ierr = PetscFree(jj);CHKERRQ(ierr); 1465 1466 /* store nonzero values */ 1467 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1468 count = 0; 1469 for (i=0; i<a->mbs; i++) { 1470 for (j=0; j<bs; j++) { 1471 for (k=a->i[i]; k<a->i[i+1]; k++) { 1472 for (l=0; l<bs; l++) { 1473 aa[count++] = a->a[bs2*k + l*bs + j]; 1474 } 1475 } 1476 } 1477 } 1478 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscFree(aa);CHKERRQ(ierr); 1480 1481 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1482 if (file) { 1483 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1490 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1491 { 1492 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1493 PetscErrorCode ierr; 1494 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1495 PetscViewerFormat format; 1496 1497 PetscFunctionBegin; 1498 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1499 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1500 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1501 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1502 Mat aij; 1503 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1504 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1505 ierr = MatDestroy(aij);CHKERRQ(ierr); 1506 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1509 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1510 for (i=0; i<a->mbs; i++) { 1511 for (j=0; j<bs; j++) { 1512 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1513 for (k=a->i[i]; k<a->i[i+1]; k++) { 1514 for (l=0; l<bs; l++) { 1515 #if defined(PETSC_USE_COMPLEX) 1516 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1517 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1518 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1519 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1520 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1521 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1522 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1523 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1524 } 1525 #else 1526 if (a->a[bs2*k + l*bs + j] != 0.0) { 1527 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1528 } 1529 #endif 1530 } 1531 } 1532 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1533 } 1534 } 1535 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1536 } else { 1537 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1538 for (i=0; i<a->mbs; i++) { 1539 for (j=0; j<bs; j++) { 1540 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1541 for (k=a->i[i]; k<a->i[i+1]; k++) { 1542 for (l=0; l<bs; l++) { 1543 #if defined(PETSC_USE_COMPLEX) 1544 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1546 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1547 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1549 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } else { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } 1553 #else 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1555 #endif 1556 } 1557 } 1558 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1559 } 1560 } 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1562 } 1563 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1569 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1570 { 1571 Mat A = (Mat) Aa; 1572 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1573 PetscErrorCode ierr; 1574 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1575 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1576 MatScalar *aa; 1577 PetscViewer viewer; 1578 1579 PetscFunctionBegin; 1580 1581 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1582 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1583 1584 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1585 1586 /* loop over matrix elements drawing boxes */ 1587 color = PETSC_DRAW_BLUE; 1588 for (i=0,row=0; i<mbs; i++,row+=bs) { 1589 for (j=a->i[i]; j<a->i[i+1]; j++) { 1590 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1591 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1592 aa = a->a + j*bs2; 1593 for (k=0; k<bs; k++) { 1594 for (l=0; l<bs; l++) { 1595 if (PetscRealPart(*aa++) >= 0.) continue; 1596 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1597 } 1598 } 1599 } 1600 } 1601 color = PETSC_DRAW_CYAN; 1602 for (i=0,row=0; i<mbs; i++,row+=bs) { 1603 for (j=a->i[i]; j<a->i[i+1]; j++) { 1604 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1605 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1606 aa = a->a + j*bs2; 1607 for (k=0; k<bs; k++) { 1608 for (l=0; l<bs; l++) { 1609 if (PetscRealPart(*aa++) != 0.) continue; 1610 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1611 } 1612 } 1613 } 1614 } 1615 1616 color = PETSC_DRAW_RED; 1617 for (i=0,row=0; i<mbs; i++,row+=bs) { 1618 for (j=a->i[i]; j<a->i[i+1]; j++) { 1619 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1620 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1621 aa = a->a + j*bs2; 1622 for (k=0; k<bs; k++) { 1623 for (l=0; l<bs; l++) { 1624 if (PetscRealPart(*aa++) <= 0.) continue; 1625 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1626 } 1627 } 1628 } 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1635 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1636 { 1637 PetscErrorCode ierr; 1638 PetscReal xl,yl,xr,yr,w,h; 1639 PetscDraw draw; 1640 PetscTruth isnull; 1641 1642 PetscFunctionBegin; 1643 1644 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1645 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1646 1647 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1648 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1649 xr += w; yr += h; xl = -w; yl = -h; 1650 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1651 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1652 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatView_SeqBAIJ" 1658 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1659 { 1660 PetscErrorCode ierr; 1661 PetscTruth iascii,isbinary,isdraw; 1662 1663 PetscFunctionBegin; 1664 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1666 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1667 if (iascii){ 1668 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1669 } else if (isbinary) { 1670 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1671 } else if (isdraw) { 1672 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1673 } else { 1674 Mat B; 1675 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1676 ierr = MatView(B,viewer);CHKERRQ(ierr); 1677 ierr = MatDestroy(B);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1685 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1686 { 1687 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1688 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1689 PetscInt *ai = a->i,*ailen = a->ilen; 1690 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1691 MatScalar *ap,*aa = a->a; 1692 1693 PetscFunctionBegin; 1694 for (k=0; k<m; k++) { /* loop over rows */ 1695 row = im[k]; brow = row/bs; 1696 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1697 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1698 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1699 nrow = ailen[brow]; 1700 for (l=0; l<n; l++) { /* loop over columns */ 1701 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1702 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1703 col = in[l] ; 1704 bcol = col/bs; 1705 cidx = col%bs; 1706 ridx = row%bs; 1707 high = nrow; 1708 low = 0; /* assume unsorted */ 1709 while (high-low > 5) { 1710 t = (low+high)/2; 1711 if (rp[t] > bcol) high = t; 1712 else low = t; 1713 } 1714 for (i=low; i<high; i++) { 1715 if (rp[i] > bcol) break; 1716 if (rp[i] == bcol) { 1717 *v++ = ap[bs2*i+bs*cidx+ridx]; 1718 goto finished; 1719 } 1720 } 1721 *v++ = 0.0; 1722 finished:; 1723 } 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #define CHUNKSIZE 10 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1731 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1732 { 1733 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1734 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1735 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1736 PetscErrorCode ierr; 1737 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1738 PetscTruth roworiented=a->roworiented; 1739 const MatScalar *value = v; 1740 MatScalar *ap,*aa = a->a,*bap; 1741 1742 PetscFunctionBegin; 1743 if (roworiented) { 1744 stepval = (n-1)*bs; 1745 } else { 1746 stepval = (m-1)*bs; 1747 } 1748 for (k=0; k<m; k++) { /* loop over added rows */ 1749 row = im[k]; 1750 if (row < 0) continue; 1751 #if defined(PETSC_USE_DEBUG) 1752 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1753 #endif 1754 rp = aj + ai[row]; 1755 ap = aa + bs2*ai[row]; 1756 rmax = imax[row]; 1757 nrow = ailen[row]; 1758 low = 0; 1759 high = nrow; 1760 for (l=0; l<n; l++) { /* loop over added columns */ 1761 if (in[l] < 0) continue; 1762 #if defined(PETSC_USE_DEBUG) 1763 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1764 #endif 1765 col = in[l]; 1766 if (roworiented) { 1767 value = v + k*(stepval+bs)*bs + l*bs; 1768 } else { 1769 value = v + l*(stepval+bs)*bs + k*bs; 1770 } 1771 if (col <= lastcol) low = 0; else high = nrow; 1772 lastcol = col; 1773 while (high-low > 7) { 1774 t = (low+high)/2; 1775 if (rp[t] > col) high = t; 1776 else low = t; 1777 } 1778 for (i=low; i<high; i++) { 1779 if (rp[i] > col) break; 1780 if (rp[i] == col) { 1781 bap = ap + bs2*i; 1782 if (roworiented) { 1783 if (is == ADD_VALUES) { 1784 for (ii=0; ii<bs; ii++,value+=stepval) { 1785 for (jj=ii; jj<bs2; jj+=bs) { 1786 bap[jj] += *value++; 1787 } 1788 } 1789 } else { 1790 for (ii=0; ii<bs; ii++,value+=stepval) { 1791 for (jj=ii; jj<bs2; jj+=bs) { 1792 bap[jj] = *value++; 1793 } 1794 } 1795 } 1796 } else { 1797 if (is == ADD_VALUES) { 1798 for (ii=0; ii<bs; ii++,value+=stepval) { 1799 for (jj=0; jj<bs; jj++) { 1800 *bap++ += *value++; 1801 } 1802 } 1803 } else { 1804 for (ii=0; ii<bs; ii++,value+=stepval) { 1805 for (jj=0; jj<bs; jj++) { 1806 *bap++ = *value++; 1807 } 1808 } 1809 } 1810 } 1811 goto noinsert2; 1812 } 1813 } 1814 if (nonew == 1) goto noinsert2; 1815 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1816 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1817 N = nrow++ - 1; high++; 1818 /* shift up all the later entries in this row */ 1819 for (ii=N; ii>=i; ii--) { 1820 rp[ii+1] = rp[ii]; 1821 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1822 } 1823 if (N >= i) { 1824 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1825 } 1826 rp[i] = col; 1827 bap = ap + bs2*i; 1828 if (roworiented) { 1829 for (ii=0; ii<bs; ii++,value+=stepval) { 1830 for (jj=ii; jj<bs2; jj+=bs) { 1831 bap[jj] = *value++; 1832 } 1833 } 1834 } else { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=0; jj<bs; jj++) { 1837 *bap++ = *value++; 1838 } 1839 } 1840 } 1841 noinsert2:; 1842 low = i; 1843 } 1844 ailen[row] = nrow; 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1851 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1852 { 1853 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1854 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1855 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1856 PetscErrorCode ierr; 1857 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1858 MatScalar *aa = a->a,*ap; 1859 PetscReal ratio=0.6; 1860 1861 PetscFunctionBegin; 1862 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1863 1864 if (m) rmax = ailen[0]; 1865 for (i=1; i<mbs; i++) { 1866 /* move each row back by the amount of empty slots (fshift) before it*/ 1867 fshift += imax[i-1] - ailen[i-1]; 1868 rmax = PetscMax(rmax,ailen[i]); 1869 if (fshift) { 1870 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1871 N = ailen[i]; 1872 for (j=0; j<N; j++) { 1873 ip[j-fshift] = ip[j]; 1874 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 } 1877 ai[i] = ai[i-1] + ailen[i-1]; 1878 } 1879 if (mbs) { 1880 fshift += imax[mbs-1] - ailen[mbs-1]; 1881 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1882 } 1883 /* reset ilen and imax for each row */ 1884 for (i=0; i<mbs; i++) { 1885 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1886 } 1887 a->nz = ai[mbs]; 1888 1889 /* diagonals may have moved, so kill the diagonal pointers */ 1890 a->idiagvalid = PETSC_FALSE; 1891 if (fshift && a->diag) { 1892 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1893 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1894 a->diag = 0; 1895 } 1896 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1897 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1898 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1899 a->reallocs = 0; 1900 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1901 1902 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1903 if (a->compressedrow.use){ 1904 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1905 } 1906 1907 A->same_nonzero = PETSC_TRUE; 1908 PetscFunctionReturn(0); 1909 } 1910 1911 /* 1912 This function returns an array of flags which indicate the locations of contiguous 1913 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1914 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1915 Assume: sizes should be long enough to hold all the values. 1916 */ 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1919 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1920 { 1921 PetscInt i,j,k,row; 1922 PetscTruth flg; 1923 1924 PetscFunctionBegin; 1925 for (i=0,j=0; i<n; j++) { 1926 row = idx[i]; 1927 if (row%bs!=0) { /* Not the begining of a block */ 1928 sizes[j] = 1; 1929 i++; 1930 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1931 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1932 i++; 1933 } else { /* Begining of the block, so check if the complete block exists */ 1934 flg = PETSC_TRUE; 1935 for (k=1; k<bs; k++) { 1936 if (row+k != idx[i+k]) { /* break in the block */ 1937 flg = PETSC_FALSE; 1938 break; 1939 } 1940 } 1941 if (flg) { /* No break in the bs */ 1942 sizes[j] = bs; 1943 i+= bs; 1944 } else { 1945 sizes[j] = 1; 1946 i++; 1947 } 1948 } 1949 } 1950 *bs_max = j; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1956 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1957 { 1958 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1959 PetscErrorCode ierr; 1960 PetscInt i,j,k,count,*rows; 1961 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1962 PetscScalar zero = 0.0; 1963 MatScalar *aa; 1964 1965 PetscFunctionBegin; 1966 /* Make a copy of the IS and sort it */ 1967 /* allocate memory for rows,sizes */ 1968 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1969 sizes = rows + is_n; 1970 1971 /* copy IS values to rows, and sort them */ 1972 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1973 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1974 if (baij->keepzeroedrows) { 1975 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1976 bs_max = is_n; 1977 A->same_nonzero = PETSC_TRUE; 1978 } else { 1979 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1980 A->same_nonzero = PETSC_FALSE; 1981 } 1982 1983 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1984 row = rows[j]; 1985 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1986 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1987 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1988 if (sizes[i] == bs && !baij->keepzeroedrows) { 1989 if (diag != 0.0) { 1990 if (baij->ilen[row/bs] > 0) { 1991 baij->ilen[row/bs] = 1; 1992 baij->j[baij->i[row/bs]] = row/bs; 1993 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1994 } 1995 /* Now insert all the diagonal values for this bs */ 1996 for (k=0; k<bs; k++) { 1997 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1998 } 1999 } else { /* (diag == 0.0) */ 2000 baij->ilen[row/bs] = 0; 2001 } /* end (diag == 0.0) */ 2002 } else { /* (sizes[i] != bs) */ 2003 #if defined (PETSC_USE_DEBUG) 2004 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2005 #endif 2006 for (k=0; k<count; k++) { 2007 aa[0] = zero; 2008 aa += bs; 2009 } 2010 if (diag != 0.0) { 2011 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2012 } 2013 } 2014 } 2015 2016 ierr = PetscFree(rows);CHKERRQ(ierr); 2017 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2023 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2024 { 2025 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2026 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2027 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2028 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2029 PetscErrorCode ierr; 2030 PetscInt ridx,cidx,bs2=a->bs2; 2031 PetscTruth roworiented=a->roworiented; 2032 MatScalar *ap,value,*aa=a->a,*bap; 2033 2034 PetscFunctionBegin; 2035 for (k=0; k<m; k++) { /* loop over added rows */ 2036 row = im[k]; 2037 brow = row/bs; 2038 if (row < 0) continue; 2039 #if defined(PETSC_USE_DEBUG) 2040 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2041 #endif 2042 rp = aj + ai[brow]; 2043 ap = aa + bs2*ai[brow]; 2044 rmax = imax[brow]; 2045 nrow = ailen[brow]; 2046 low = 0; 2047 high = nrow; 2048 for (l=0; l<n; l++) { /* loop over added columns */ 2049 if (in[l] < 0) continue; 2050 #if defined(PETSC_USE_DEBUG) 2051 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2052 #endif 2053 col = in[l]; bcol = col/bs; 2054 ridx = row % bs; cidx = col % bs; 2055 if (roworiented) { 2056 value = v[l + k*n]; 2057 } else { 2058 value = v[k + l*m]; 2059 } 2060 if (col <= lastcol) low = 0; else high = nrow; 2061 lastcol = col; 2062 while (high-low > 7) { 2063 t = (low+high)/2; 2064 if (rp[t] > bcol) high = t; 2065 else low = t; 2066 } 2067 for (i=low; i<high; i++) { 2068 if (rp[i] > bcol) break; 2069 if (rp[i] == bcol) { 2070 bap = ap + bs2*i + bs*cidx + ridx; 2071 if (is == ADD_VALUES) *bap += value; 2072 else *bap = value; 2073 goto noinsert1; 2074 } 2075 } 2076 if (nonew == 1) goto noinsert1; 2077 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2078 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2079 N = nrow++ - 1; high++; 2080 /* shift up all the later entries in this row */ 2081 for (ii=N; ii>=i; ii--) { 2082 rp[ii+1] = rp[ii]; 2083 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2084 } 2085 if (N>=i) { 2086 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2087 } 2088 rp[i] = bcol; 2089 ap[bs2*i + bs*cidx + ridx] = value; 2090 a->nz++; 2091 noinsert1:; 2092 low = i; 2093 } 2094 ailen[brow] = nrow; 2095 } 2096 A->same_nonzero = PETSC_FALSE; 2097 PetscFunctionReturn(0); 2098 } 2099 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2103 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2104 { 2105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2106 Mat outA; 2107 PetscErrorCode ierr; 2108 PetscTruth row_identity,col_identity; 2109 2110 PetscFunctionBegin; 2111 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2112 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2113 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2114 if (!row_identity || !col_identity) { 2115 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2116 } 2117 2118 outA = inA; 2119 inA->factor = FACTOR_LU; 2120 2121 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2122 2123 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2124 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2125 a->row = row; 2126 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2127 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2128 a->col = col; 2129 2130 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2131 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2133 2134 /* 2135 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2136 for ILU(0) factorization with natural ordering 2137 */ 2138 if (inA->rmap.bs < 8) { 2139 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2140 } else { 2141 if (!a->solve_work) { 2142 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2143 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2144 } 2145 } 2146 2147 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2148 2149 PetscFunctionReturn(0); 2150 } 2151 2152 EXTERN_C_BEGIN 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2155 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2156 { 2157 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2158 PetscInt i,nz,nbs; 2159 2160 PetscFunctionBegin; 2161 nz = baij->maxnz/baij->bs2; 2162 nbs = baij->nbs; 2163 for (i=0; i<nz; i++) { 2164 baij->j[i] = indices[i]; 2165 } 2166 baij->nz = nz; 2167 for (i=0; i<nbs; i++) { 2168 baij->ilen[i] = baij->imax[i]; 2169 } 2170 2171 PetscFunctionReturn(0); 2172 } 2173 EXTERN_C_END 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2177 /*@ 2178 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2179 in the matrix. 2180 2181 Input Parameters: 2182 + mat - the SeqBAIJ matrix 2183 - indices - the column indices 2184 2185 Level: advanced 2186 2187 Notes: 2188 This can be called if you have precomputed the nonzero structure of the 2189 matrix and want to provide it to the matrix object to improve the performance 2190 of the MatSetValues() operation. 2191 2192 You MUST have set the correct numbers of nonzeros per row in the call to 2193 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2194 2195 MUST be called before any calls to MatSetValues(); 2196 2197 @*/ 2198 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2199 { 2200 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2201 2202 PetscFunctionBegin; 2203 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2204 PetscValidPointer(indices,2); 2205 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2206 if (f) { 2207 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2208 } else { 2209 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2210 } 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2216 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2217 { 2218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2219 PetscErrorCode ierr; 2220 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2221 PetscReal atmp; 2222 PetscScalar *x,zero = 0.0; 2223 MatScalar *aa; 2224 PetscInt ncols,brow,krow,kcol; 2225 2226 PetscFunctionBegin; 2227 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2228 bs = A->rmap.bs; 2229 aa = a->a; 2230 ai = a->i; 2231 aj = a->j; 2232 mbs = a->mbs; 2233 2234 ierr = VecSet(v,zero);CHKERRQ(ierr); 2235 if (idx) { 2236 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 2237 } 2238 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2239 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2240 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2241 for (i=0; i<mbs; i++) { 2242 ncols = ai[1] - ai[0]; ai++; 2243 brow = bs*i; 2244 for (j=0; j<ncols; j++){ 2245 for (kcol=0; kcol<bs; kcol++){ 2246 for (krow=0; krow<bs; krow++){ 2247 atmp = PetscAbsScalar(*aa);aa++; 2248 row = brow + krow; /* row index */ 2249 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2250 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2251 } 2252 } 2253 aj++; 2254 } 2255 } 2256 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatCopy_SeqBAIJ" 2262 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2263 { 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 /* If the two matrices have the same copy implementation, use fast copy. */ 2268 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2270 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2271 2272 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2273 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2274 } 2275 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2276 } else { 2277 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2278 } 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2284 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2285 { 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2295 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2296 { 2297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2298 PetscFunctionBegin; 2299 *array = a->a; 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2305 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2306 { 2307 PetscFunctionBegin; 2308 PetscFunctionReturn(0); 2309 } 2310 2311 #include "petscblaslapack.h" 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2314 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2315 { 2316 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2317 PetscErrorCode ierr; 2318 PetscInt i,bs=Y->rmap.bs,j,bs2; 2319 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2320 2321 PetscFunctionBegin; 2322 if (str == SAME_NONZERO_PATTERN) { 2323 PetscScalar alpha = a; 2324 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2325 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2326 if (y->xtoy && y->XtoY != X) { 2327 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2328 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2329 } 2330 if (!y->xtoy) { /* get xtoy */ 2331 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2332 y->XtoY = X; 2333 } 2334 bs2 = bs*bs; 2335 for (i=0; i<x->nz; i++) { 2336 j = 0; 2337 while (j < bs2){ 2338 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2339 j++; 2340 } 2341 } 2342 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2343 } else { 2344 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2351 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2352 { 2353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2354 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2355 PetscScalar *aa = a->a; 2356 2357 PetscFunctionBegin; 2358 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2359 PetscFunctionReturn(0); 2360 } 2361 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2364 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2365 { 2366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2367 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2368 PetscScalar *aa = a->a; 2369 2370 PetscFunctionBegin; 2371 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2372 PetscFunctionReturn(0); 2373 } 2374 2375 2376 /* -------------------------------------------------------------------*/ 2377 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2378 MatGetRow_SeqBAIJ, 2379 MatRestoreRow_SeqBAIJ, 2380 MatMult_SeqBAIJ_N, 2381 /* 4*/ MatMultAdd_SeqBAIJ_N, 2382 MatMultTranspose_SeqBAIJ, 2383 MatMultTransposeAdd_SeqBAIJ, 2384 MatSolve_SeqBAIJ_N, 2385 0, 2386 0, 2387 /*10*/ 0, 2388 MatLUFactor_SeqBAIJ, 2389 0, 2390 0, 2391 MatTranspose_SeqBAIJ, 2392 /*15*/ MatGetInfo_SeqBAIJ, 2393 MatEqual_SeqBAIJ, 2394 MatGetDiagonal_SeqBAIJ, 2395 MatDiagonalScale_SeqBAIJ, 2396 MatNorm_SeqBAIJ, 2397 /*20*/ 0, 2398 MatAssemblyEnd_SeqBAIJ, 2399 0, 2400 MatSetOption_SeqBAIJ, 2401 MatZeroEntries_SeqBAIJ, 2402 /*25*/ MatZeroRows_SeqBAIJ, 2403 MatLUFactorSymbolic_SeqBAIJ, 2404 MatLUFactorNumeric_SeqBAIJ_N, 2405 MatCholeskyFactorSymbolic_SeqBAIJ, 2406 MatCholeskyFactorNumeric_SeqBAIJ_N, 2407 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2408 MatILUFactorSymbolic_SeqBAIJ, 2409 MatICCFactorSymbolic_SeqBAIJ, 2410 MatGetArray_SeqBAIJ, 2411 MatRestoreArray_SeqBAIJ, 2412 /*35*/ MatDuplicate_SeqBAIJ, 2413 0, 2414 0, 2415 MatILUFactor_SeqBAIJ, 2416 0, 2417 /*40*/ MatAXPY_SeqBAIJ, 2418 MatGetSubMatrices_SeqBAIJ, 2419 MatIncreaseOverlap_SeqBAIJ, 2420 MatGetValues_SeqBAIJ, 2421 MatCopy_SeqBAIJ, 2422 /*45*/ 0, 2423 MatScale_SeqBAIJ, 2424 0, 2425 0, 2426 0, 2427 /*50*/ 0, 2428 MatGetRowIJ_SeqBAIJ, 2429 MatRestoreRowIJ_SeqBAIJ, 2430 0, 2431 0, 2432 /*55*/ 0, 2433 0, 2434 0, 2435 0, 2436 MatSetValuesBlocked_SeqBAIJ, 2437 /*60*/ MatGetSubMatrix_SeqBAIJ, 2438 MatDestroy_SeqBAIJ, 2439 MatView_SeqBAIJ, 2440 0, 2441 0, 2442 /*65*/ 0, 2443 0, 2444 0, 2445 0, 2446 0, 2447 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2448 MatConvert_Basic, 2449 0, 2450 0, 2451 0, 2452 /*75*/ 0, 2453 0, 2454 0, 2455 0, 2456 0, 2457 /*80*/ 0, 2458 0, 2459 0, 2460 0, 2461 MatLoad_SeqBAIJ, 2462 /*85*/ 0, 2463 0, 2464 0, 2465 0, 2466 0, 2467 /*90*/ 0, 2468 0, 2469 0, 2470 0, 2471 0, 2472 /*95*/ 0, 2473 0, 2474 0, 2475 0, 2476 0, 2477 /*100*/0, 2478 0, 2479 0, 2480 0, 2481 0, 2482 /*105*/0, 2483 MatRealPart_SeqBAIJ, 2484 MatImaginaryPart_SeqBAIJ, 2485 0, 2486 0, 2487 /*110*/0, 2488 0, 2489 0, 2490 0, 2491 MatMissingDiagonal_SeqBAIJ 2492 /*115*/ 2493 }; 2494 2495 EXTERN_C_BEGIN 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2498 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2499 { 2500 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2501 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2502 PetscErrorCode ierr; 2503 2504 PetscFunctionBegin; 2505 if (aij->nonew != 1) { 2506 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2507 } 2508 2509 /* allocate space for values if not already there */ 2510 if (!aij->saved_values) { 2511 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2512 } 2513 2514 /* copy values over */ 2515 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 EXTERN_C_END 2519 2520 EXTERN_C_BEGIN 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2523 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2524 { 2525 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2526 PetscErrorCode ierr; 2527 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2528 2529 PetscFunctionBegin; 2530 if (aij->nonew != 1) { 2531 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2532 } 2533 if (!aij->saved_values) { 2534 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2535 } 2536 2537 /* copy values over */ 2538 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 EXTERN_C_END 2542 2543 EXTERN_C_BEGIN 2544 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2545 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2546 EXTERN_C_END 2547 2548 EXTERN_C_BEGIN 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2551 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2552 { 2553 Mat_SeqBAIJ *b; 2554 PetscErrorCode ierr; 2555 PetscInt i,mbs,nbs,bs2,newbs = bs; 2556 PetscTruth flg,skipallocation = PETSC_FALSE; 2557 2558 PetscFunctionBegin; 2559 2560 if (nz == MAT_SKIP_ALLOCATION) { 2561 skipallocation = PETSC_TRUE; 2562 nz = 0; 2563 } 2564 2565 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2566 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2567 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2568 2569 if (nnz && newbs != bs) { 2570 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2571 } 2572 bs = newbs; 2573 2574 B->rmap.bs = B->cmap.bs = bs; 2575 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2576 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2577 2578 B->preallocated = PETSC_TRUE; 2579 2580 mbs = B->rmap.n/bs; 2581 nbs = B->cmap.n/bs; 2582 bs2 = bs*bs; 2583 2584 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2585 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2586 } 2587 2588 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2589 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2590 if (nnz) { 2591 for (i=0; i<mbs; i++) { 2592 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2593 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2594 } 2595 } 2596 2597 b = (Mat_SeqBAIJ*)B->data; 2598 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2599 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2600 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2601 2602 B->ops->solve = MatSolve_SeqBAIJ_Update; 2603 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2604 if (!flg) { 2605 switch (bs) { 2606 case 1: 2607 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2608 B->ops->mult = MatMult_SeqBAIJ_1; 2609 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2610 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2611 break; 2612 case 2: 2613 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2614 B->ops->mult = MatMult_SeqBAIJ_2; 2615 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2616 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2617 break; 2618 case 3: 2619 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2620 B->ops->mult = MatMult_SeqBAIJ_3; 2621 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2622 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2623 break; 2624 case 4: 2625 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2626 B->ops->mult = MatMult_SeqBAIJ_4; 2627 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2628 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2629 break; 2630 case 5: 2631 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2632 B->ops->mult = MatMult_SeqBAIJ_5; 2633 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2634 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2635 break; 2636 case 6: 2637 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2638 B->ops->mult = MatMult_SeqBAIJ_6; 2639 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2640 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2641 break; 2642 case 7: 2643 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2644 B->ops->mult = MatMult_SeqBAIJ_7; 2645 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2646 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2647 break; 2648 default: 2649 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2650 B->ops->mult = MatMult_SeqBAIJ_N; 2651 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2652 break; 2653 } 2654 } 2655 B->rmap.bs = bs; 2656 b->mbs = mbs; 2657 b->nbs = nbs; 2658 if (!skipallocation) { 2659 if (!b->imax) { 2660 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2661 } 2662 /* b->ilen will count nonzeros in each block row so far. */ 2663 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2664 if (!nnz) { 2665 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2666 else if (nz <= 0) nz = 1; 2667 for (i=0; i<mbs; i++) b->imax[i] = nz; 2668 nz = nz*mbs; 2669 } else { 2670 nz = 0; 2671 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2672 } 2673 2674 /* allocate the matrix space */ 2675 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2676 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2677 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2678 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2679 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2680 b->singlemalloc = PETSC_TRUE; 2681 b->i[0] = 0; 2682 for (i=1; i<mbs+1; i++) { 2683 b->i[i] = b->i[i-1] + b->imax[i-1]; 2684 } 2685 b->free_a = PETSC_TRUE; 2686 b->free_ij = PETSC_TRUE; 2687 } else { 2688 b->free_a = PETSC_FALSE; 2689 b->free_ij = PETSC_FALSE; 2690 } 2691 2692 B->rmap.bs = bs; 2693 b->bs2 = bs2; 2694 b->mbs = mbs; 2695 b->nz = 0; 2696 b->maxnz = nz*bs2; 2697 B->info.nz_unneeded = (PetscReal)b->maxnz; 2698 PetscFunctionReturn(0); 2699 } 2700 EXTERN_C_END 2701 2702 /*MC 2703 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2704 block sparse compressed row format. 2705 2706 Options Database Keys: 2707 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2708 2709 Level: beginner 2710 2711 .seealso: MatCreateSeqBAIJ() 2712 M*/ 2713 2714 EXTERN_C_BEGIN 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatCreate_SeqBAIJ" 2717 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2718 { 2719 PetscErrorCode ierr; 2720 PetscMPIInt size; 2721 Mat_SeqBAIJ *b; 2722 2723 PetscFunctionBegin; 2724 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2725 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2726 2727 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2728 B->data = (void*)b; 2729 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2730 B->factor = 0; 2731 B->mapping = 0; 2732 b->row = 0; 2733 b->col = 0; 2734 b->icol = 0; 2735 b->reallocs = 0; 2736 b->saved_values = 0; 2737 2738 b->roworiented = PETSC_TRUE; 2739 b->nonew = 0; 2740 b->diag = 0; 2741 b->solve_work = 0; 2742 b->mult_work = 0; 2743 B->spptr = 0; 2744 B->info.nz_unneeded = (PetscReal)b->maxnz; 2745 b->keepzeroedrows = PETSC_FALSE; 2746 b->xtoy = 0; 2747 b->XtoY = 0; 2748 b->compressedrow.use = PETSC_FALSE; 2749 b->compressedrow.nrows = 0; 2750 b->compressedrow.i = PETSC_NULL; 2751 b->compressedrow.rindex = PETSC_NULL; 2752 b->compressedrow.checked = PETSC_FALSE; 2753 B->same_nonzero = PETSC_FALSE; 2754 2755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2756 "MatInvertBlockDiagonal_SeqBAIJ", 2757 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2758 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2759 "MatStoreValues_SeqBAIJ", 2760 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2762 "MatRetrieveValues_SeqBAIJ", 2763 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2764 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2765 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2766 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2768 "MatConvert_SeqBAIJ_SeqAIJ", 2769 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2771 "MatConvert_SeqBAIJ_SeqSBAIJ", 2772 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2774 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2775 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2776 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 EXTERN_C_END 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2783 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2784 { 2785 Mat C; 2786 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2787 PetscErrorCode ierr; 2788 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2789 2790 PetscFunctionBegin; 2791 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2792 2793 *B = 0; 2794 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2795 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2796 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2797 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2798 c = (Mat_SeqBAIJ*)C->data; 2799 2800 C->rmap.N = A->rmap.N; 2801 C->cmap.N = A->cmap.N; 2802 C->rmap.bs = A->rmap.bs; 2803 c->bs2 = a->bs2; 2804 c->mbs = a->mbs; 2805 c->nbs = a->nbs; 2806 2807 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2808 for (i=0; i<mbs; i++) { 2809 c->imax[i] = a->imax[i]; 2810 c->ilen[i] = a->ilen[i]; 2811 } 2812 2813 /* allocate the matrix space */ 2814 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2815 c->singlemalloc = PETSC_TRUE; 2816 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2817 if (mbs > 0) { 2818 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2819 if (cpvalues == MAT_COPY_VALUES) { 2820 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2821 } else { 2822 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2823 } 2824 } 2825 c->roworiented = a->roworiented; 2826 c->nonew = a->nonew; 2827 2828 if (a->diag) { 2829 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2830 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2831 for (i=0; i<mbs; i++) { 2832 c->diag[i] = a->diag[i]; 2833 } 2834 } else c->diag = 0; 2835 c->nz = a->nz; 2836 c->maxnz = a->maxnz; 2837 c->solve_work = 0; 2838 c->mult_work = 0; 2839 c->free_a = PETSC_TRUE; 2840 c->free_ij = PETSC_TRUE; 2841 C->preallocated = PETSC_TRUE; 2842 C->assembled = PETSC_TRUE; 2843 2844 c->compressedrow.use = a->compressedrow.use; 2845 c->compressedrow.nrows = a->compressedrow.nrows; 2846 c->compressedrow.checked = a->compressedrow.checked; 2847 if ( a->compressedrow.checked && a->compressedrow.use){ 2848 i = a->compressedrow.nrows; 2849 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2850 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2851 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2852 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2853 } else { 2854 c->compressedrow.use = PETSC_FALSE; 2855 c->compressedrow.i = PETSC_NULL; 2856 c->compressedrow.rindex = PETSC_NULL; 2857 } 2858 C->same_nonzero = A->same_nonzero; 2859 *B = C; 2860 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatLoad_SeqBAIJ" 2866 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2867 { 2868 Mat_SeqBAIJ *a; 2869 Mat B; 2870 PetscErrorCode ierr; 2871 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2872 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2873 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2874 PetscInt *masked,nmask,tmp,bs2,ishift; 2875 PetscMPIInt size; 2876 int fd; 2877 PetscScalar *aa; 2878 MPI_Comm comm = ((PetscObject)viewer)->comm; 2879 2880 PetscFunctionBegin; 2881 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2882 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2883 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2884 bs2 = bs*bs; 2885 2886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2887 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2890 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2891 M = header[1]; N = header[2]; nz = header[3]; 2892 2893 if (header[3] < 0) { 2894 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2895 } 2896 2897 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2898 2899 /* 2900 This code adds extra rows to make sure the number of rows is 2901 divisible by the blocksize 2902 */ 2903 mbs = M/bs; 2904 extra_rows = bs - M + bs*(mbs); 2905 if (extra_rows == bs) extra_rows = 0; 2906 else mbs++; 2907 if (extra_rows) { 2908 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2909 } 2910 2911 /* read in row lengths */ 2912 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2914 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2915 2916 /* read in column indices */ 2917 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2918 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2919 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2920 2921 /* loop over row lengths determining block row lengths */ 2922 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2923 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2924 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2925 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2926 masked = mask + mbs; 2927 rowcount = 0; nzcount = 0; 2928 for (i=0; i<mbs; i++) { 2929 nmask = 0; 2930 for (j=0; j<bs; j++) { 2931 kmax = rowlengths[rowcount]; 2932 for (k=0; k<kmax; k++) { 2933 tmp = jj[nzcount++]/bs; 2934 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2935 } 2936 rowcount++; 2937 } 2938 browlengths[i] += nmask; 2939 /* zero out the mask elements we set */ 2940 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2941 } 2942 2943 /* create our matrix */ 2944 ierr = MatCreate(comm,&B); 2945 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2946 ierr = MatSetType(B,type);CHKERRQ(ierr); 2947 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2948 a = (Mat_SeqBAIJ*)B->data; 2949 2950 /* set matrix "i" values */ 2951 a->i[0] = 0; 2952 for (i=1; i<= mbs; i++) { 2953 a->i[i] = a->i[i-1] + browlengths[i-1]; 2954 a->ilen[i-1] = browlengths[i-1]; 2955 } 2956 a->nz = 0; 2957 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2958 2959 /* read in nonzero values */ 2960 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2961 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2962 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2963 2964 /* set "a" and "j" values into matrix */ 2965 nzcount = 0; jcount = 0; 2966 for (i=0; i<mbs; i++) { 2967 nzcountb = nzcount; 2968 nmask = 0; 2969 for (j=0; j<bs; j++) { 2970 kmax = rowlengths[i*bs+j]; 2971 for (k=0; k<kmax; k++) { 2972 tmp = jj[nzcount++]/bs; 2973 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2974 } 2975 } 2976 /* sort the masked values */ 2977 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2978 2979 /* set "j" values into matrix */ 2980 maskcount = 1; 2981 for (j=0; j<nmask; j++) { 2982 a->j[jcount++] = masked[j]; 2983 mask[masked[j]] = maskcount++; 2984 } 2985 /* set "a" values into matrix */ 2986 ishift = bs2*a->i[i]; 2987 for (j=0; j<bs; j++) { 2988 kmax = rowlengths[i*bs+j]; 2989 for (k=0; k<kmax; k++) { 2990 tmp = jj[nzcountb]/bs ; 2991 block = mask[tmp] - 1; 2992 point = jj[nzcountb] - bs*tmp; 2993 idx = ishift + bs2*block + j + bs*point; 2994 a->a[idx] = (MatScalar)aa[nzcountb++]; 2995 } 2996 } 2997 /* zero out the mask elements we set */ 2998 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2999 } 3000 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3001 3002 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3003 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3004 ierr = PetscFree(aa);CHKERRQ(ierr); 3005 ierr = PetscFree(jj);CHKERRQ(ierr); 3006 ierr = PetscFree(mask);CHKERRQ(ierr); 3007 3008 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3009 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3010 ierr = MatView_Private(B);CHKERRQ(ierr); 3011 3012 *A = B; 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatCreateSeqBAIJ" 3018 /*@C 3019 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3020 compressed row) format. For good matrix assembly performance the 3021 user should preallocate the matrix storage by setting the parameter nz 3022 (or the array nnz). By setting these parameters accurately, performance 3023 during matrix assembly can be increased by more than a factor of 50. 3024 3025 Collective on MPI_Comm 3026 3027 Input Parameters: 3028 + comm - MPI communicator, set to PETSC_COMM_SELF 3029 . bs - size of block 3030 . m - number of rows 3031 . n - number of columns 3032 . nz - number of nonzero blocks per block row (same for all rows) 3033 - nnz - array containing the number of nonzero blocks in the various block rows 3034 (possibly different for each block row) or PETSC_NULL 3035 3036 Output Parameter: 3037 . A - the matrix 3038 3039 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3040 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3041 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3042 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3043 3044 Options Database Keys: 3045 . -mat_no_unroll - uses code that does not unroll the loops in the 3046 block calculations (much slower) 3047 . -mat_block_size - size of the blocks to use 3048 3049 Level: intermediate 3050 3051 Notes: 3052 The number of rows and columns must be divisible by blocksize. 3053 3054 If the nnz parameter is given then the nz parameter is ignored 3055 3056 A nonzero block is any block that as 1 or more nonzeros in it 3057 3058 The block AIJ format is fully compatible with standard Fortran 77 3059 storage. That is, the stored row and column indices can begin at 3060 either one (as in Fortran) or zero. See the users' manual for details. 3061 3062 Specify the preallocated storage with either nz or nnz (not both). 3063 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3064 allocation. For additional details, see the users manual chapter on 3065 matrices. 3066 3067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3068 @*/ 3069 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3070 { 3071 PetscErrorCode ierr; 3072 3073 PetscFunctionBegin; 3074 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3075 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3076 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3077 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3083 /*@C 3084 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3085 per row in the matrix. For good matrix assembly performance the 3086 user should preallocate the matrix storage by setting the parameter nz 3087 (or the array nnz). By setting these parameters accurately, performance 3088 during matrix assembly can be increased by more than a factor of 50. 3089 3090 Collective on MPI_Comm 3091 3092 Input Parameters: 3093 + A - the matrix 3094 . bs - size of block 3095 . nz - number of block nonzeros per block row (same for all rows) 3096 - nnz - array containing the number of block nonzeros in the various block rows 3097 (possibly different for each block row) or PETSC_NULL 3098 3099 Options Database Keys: 3100 . -mat_no_unroll - uses code that does not unroll the loops in the 3101 block calculations (much slower) 3102 . -mat_block_size - size of the blocks to use 3103 3104 Level: intermediate 3105 3106 Notes: 3107 If the nnz parameter is given then the nz parameter is ignored 3108 3109 You can call MatGetInfo() to get information on how effective the preallocation was; 3110 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3111 You can also run with the option -info and look for messages with the string 3112 malloc in them to see if additional memory allocation was needed. 3113 3114 The block AIJ format is fully compatible with standard Fortran 77 3115 storage. That is, the stored row and column indices can begin at 3116 either one (as in Fortran) or zero. See the users' manual for details. 3117 3118 Specify the preallocated storage with either nz or nnz (not both). 3119 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3120 allocation. For additional details, see the users manual chapter on 3121 matrices. 3122 3123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3124 @*/ 3125 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3126 { 3127 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3128 3129 PetscFunctionBegin; 3130 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3131 if (f) { 3132 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3133 } 3134 PetscFunctionReturn(0); 3135 } 3136 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3139 /*@ 3140 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3141 (upper triangular entries in CSR format) provided by the user. 3142 3143 Collective on MPI_Comm 3144 3145 Input Parameters: 3146 + comm - must be an MPI communicator of size 1 3147 . bs - size of block 3148 . m - number of rows 3149 . n - number of columns 3150 . i - row indices 3151 . j - column indices 3152 - a - matrix values 3153 3154 Output Parameter: 3155 . mat - the matrix 3156 3157 Level: intermediate 3158 3159 Notes: 3160 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3161 once the matrix is destroyed 3162 3163 You cannot set new nonzero locations into this matrix, that will generate an error. 3164 3165 The i and j indices are 0 based 3166 3167 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3168 3169 @*/ 3170 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3171 { 3172 PetscErrorCode ierr; 3173 PetscInt ii; 3174 Mat_SeqBAIJ *baij; 3175 3176 PetscFunctionBegin; 3177 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3178 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3179 3180 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3181 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3182 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3183 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3184 baij = (Mat_SeqBAIJ*)(*mat)->data; 3185 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3186 3187 baij->i = i; 3188 baij->j = j; 3189 baij->a = a; 3190 baij->singlemalloc = PETSC_FALSE; 3191 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3192 baij->free_a = PETSC_FALSE; 3193 baij->free_ij = PETSC_FALSE; 3194 3195 for (ii=0; ii<m; ii++) { 3196 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3197 #if defined(PETSC_USE_DEBUG) 3198 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3199 #endif 3200 } 3201 #if defined(PETSC_USE_DEBUG) 3202 for (ii=0; ii<baij->i[m]; ii++) { 3203 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3204 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3205 } 3206 #endif 3207 3208 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3210 PetscFunctionReturn(0); 3211 } 3212