1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 118 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,s1; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0]; 146 i2 = 1; 147 idiag += 1; 148 for (i=1; i<m; i++) { 149 v = aa + ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; 153 while (nz--) { 154 idx = (*vi++); 155 x1 = x[idx]; 156 s1 -= v[0]*x1; 157 v += 1; 158 } 159 x[i2] = idiag[0]*s1; 160 idiag += 1; 161 i2 += 1; 162 } 163 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 164 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 165 } 166 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 167 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 168 i2 = 0; 169 mdiag = a->idiag+a->mbs; 170 for (i=0; i<m; i++) { 171 x1 = x[i2]; 172 x[i2] = mdiag[0]*x1; 173 mdiag += 1; 174 i2 += 1; 175 } 176 ierr = PetscLogFlops(m);CHKERRQ(ierr); 177 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 181 idiag = a->idiag+a->mbs - 1; 182 i2 = m - 1; 183 x1 = x[i2]; 184 x[i2] = idiag[0]*x1; 185 idiag -= 1; 186 i2 -= 1; 187 for (i=m-2; i>=0; i--) { 188 v = aa + (diag[i]+1); 189 vi = aj + diag[i] + 1; 190 nz = ai[i+1] - diag[i] - 1; 191 s1 = x[i2]; 192 while (nz--) { 193 idx = (*vi++); 194 x1 = x[idx]; 195 s1 -= v[0]*x1; 196 v += 1; 197 } 198 x[i2] = idiag[0]*s1; 199 idiag -= 2; 200 i2 -= 1; 201 } 202 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 203 } 204 } else { 205 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 206 } 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 214 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 PetscScalar *x,x1,x2,s1,s2; 218 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 219 PetscErrorCode ierr; 220 PetscInt m = a->mbs,i,i2,nz,idx; 221 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 222 223 PetscFunctionBegin; 224 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 225 its = its*lits; 226 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 227 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 228 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 229 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 230 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 231 232 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 233 234 diag = a->diag; 235 idiag = a->idiag; 236 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 237 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 241 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 242 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 243 i2 = 2; 244 idiag += 4; 245 for (i=1; i<m; i++) { 246 v = aa + 4*ai[i]; 247 vi = aj + ai[i]; 248 nz = diag[i] - ai[i]; 249 s1 = b[i2]; s2 = b[i2+1]; 250 while (nz--) { 251 idx = 2*(*vi++); 252 x1 = x[idx]; x2 = x[1+idx]; 253 s1 -= v[0]*x1 + v[2]*x2; 254 s2 -= v[1]*x1 + v[3]*x2; 255 v += 4; 256 } 257 x[i2] = idiag[0]*s1 + idiag[2]*s2; 258 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 259 idiag += 4; 260 i2 += 2; 261 } 262 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 263 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 264 } 265 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 266 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 267 i2 = 0; 268 mdiag = a->idiag+4*a->mbs; 269 for (i=0; i<m; i++) { 270 x1 = x[i2]; x2 = x[i2+1]; 271 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 272 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 273 mdiag += 4; 274 i2 += 2; 275 } 276 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 277 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 278 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 279 } 280 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 281 idiag = a->idiag+4*a->mbs - 4; 282 i2 = 2*m - 2; 283 x1 = x[i2]; x2 = x[i2+1]; 284 x[i2] = idiag[0]*x1 + idiag[2]*x2; 285 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 286 idiag -= 4; 287 i2 -= 2; 288 for (i=m-2; i>=0; i--) { 289 v = aa + 4*(diag[i]+1); 290 vi = aj + diag[i] + 1; 291 nz = ai[i+1] - diag[i] - 1; 292 s1 = x[i2]; s2 = x[i2+1]; 293 while (nz--) { 294 idx = 2*(*vi++); 295 x1 = x[idx]; x2 = x[1+idx]; 296 s1 -= v[0]*x1 + v[2]*x2; 297 s2 -= v[1]*x1 + v[3]*x2; 298 v += 4; 299 } 300 x[i2] = idiag[0]*s1 + idiag[2]*s2; 301 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 302 idiag -= 4; 303 i2 -= 2; 304 } 305 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309 } 310 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 311 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 317 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 318 { 319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 320 PetscScalar *x,x1,x2,x3,s1,s2,s3; 321 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 322 PetscErrorCode ierr; 323 PetscInt m = a->mbs,i,i2,nz,idx; 324 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325 326 PetscFunctionBegin; 327 its = its*lits; 328 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 329 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 332 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334 335 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 336 337 diag = a->diag; 338 idiag = a->idiag; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 341 342 if (flag & SOR_ZERO_INITIAL_GUESS) { 343 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 344 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347 i2 = 3; 348 idiag += 9; 349 for (i=1; i<m; i++) { 350 v = aa + 9*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354 while (nz--) { 355 idx = 3*(*vi++); 356 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365 idiag += 9; 366 i2 += 3; 367 } 368 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 370 } 371 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373 i2 = 0; 374 mdiag = a->idiag+9*a->mbs; 375 for (i=0; i<m; i++) { 376 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380 mdiag += 9; 381 i2 += 3; 382 } 383 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 384 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 386 } 387 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 388 idiag = a->idiag+9*a->mbs - 9; 389 i2 = 3*m - 3; 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394 idiag -= 9; 395 i2 -= 3; 396 for (i=m-2; i>=0; i--) { 397 v = aa + 9*(diag[i]+1); 398 vi = aj + diag[i] + 1; 399 nz = ai[i+1] - diag[i] - 1; 400 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401 while (nz--) { 402 idx = 3*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407 v += 9; 408 } 409 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412 idiag -= 9; 413 i2 -= 3; 414 } 415 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 416 } 417 } else { 418 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 419 } 420 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 427 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 431 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 432 PetscErrorCode ierr; 433 PetscInt m = a->mbs,i,i2,nz,idx; 434 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 435 436 PetscFunctionBegin; 437 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 438 its = its*lits; 439 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 440 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 441 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 442 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 443 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 444 445 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 446 447 diag = a->diag; 448 idiag = a->idiag; 449 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 451 452 if (flag & SOR_ZERO_INITIAL_GUESS) { 453 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 454 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 455 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 456 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 457 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 458 i2 = 4; 459 idiag += 16; 460 for (i=1; i<m; i++) { 461 v = aa + 16*ai[i]; 462 vi = aj + ai[i]; 463 nz = diag[i] - ai[i]; 464 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 465 while (nz--) { 466 idx = 4*(*vi++); 467 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 468 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 469 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 470 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 471 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 472 v += 16; 473 } 474 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 475 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 476 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 477 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 478 idiag += 16; 479 i2 += 4; 480 } 481 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 482 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 483 } 484 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 485 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 486 i2 = 0; 487 mdiag = a->idiag+16*a->mbs; 488 for (i=0; i<m; i++) { 489 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 490 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 491 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 492 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 493 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 494 mdiag += 16; 495 i2 += 4; 496 } 497 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 498 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 499 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 500 } 501 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 502 idiag = a->idiag+16*a->mbs - 16; 503 i2 = 4*m - 4; 504 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 505 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 506 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 507 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 508 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 509 idiag -= 16; 510 i2 -= 4; 511 for (i=m-2; i>=0; i--) { 512 v = aa + 16*(diag[i]+1); 513 vi = aj + diag[i] + 1; 514 nz = ai[i+1] - diag[i] - 1; 515 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 516 while (nz--) { 517 idx = 4*(*vi++); 518 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 519 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 520 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 521 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 522 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 523 v += 16; 524 } 525 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 526 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 527 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 528 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 529 idiag -= 16; 530 i2 -= 4; 531 } 532 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 533 } 534 } else { 535 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 536 } 537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 538 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 544 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 547 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 548 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 549 PetscErrorCode ierr; 550 PetscInt m = a->mbs,i,i2,nz,idx; 551 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 552 553 PetscFunctionBegin; 554 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 555 its = its*lits; 556 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 557 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 558 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 559 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 560 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 561 562 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 563 564 diag = a->diag; 565 idiag = a->idiag; 566 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 567 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 568 569 if (flag & SOR_ZERO_INITIAL_GUESS) { 570 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 571 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 572 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 573 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 574 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 575 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 576 i2 = 5; 577 idiag += 25; 578 for (i=1; i<m; i++) { 579 v = aa + 25*ai[i]; 580 vi = aj + ai[i]; 581 nz = diag[i] - ai[i]; 582 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 583 while (nz--) { 584 idx = 5*(*vi++); 585 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 586 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 587 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 588 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 589 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 590 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 591 v += 25; 592 } 593 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 594 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 595 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 596 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 597 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 598 idiag += 25; 599 i2 += 5; 600 } 601 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 602 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 603 } 604 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 605 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 606 i2 = 0; 607 mdiag = a->idiag+25*a->mbs; 608 for (i=0; i<m; i++) { 609 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 610 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 611 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 612 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 613 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 614 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 615 mdiag += 25; 616 i2 += 5; 617 } 618 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 619 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 620 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 621 } 622 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 623 idiag = a->idiag+25*a->mbs - 25; 624 i2 = 5*m - 5; 625 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 626 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 627 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 628 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 629 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 630 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 631 idiag -= 25; 632 i2 -= 5; 633 for (i=m-2; i>=0; i--) { 634 v = aa + 25*(diag[i]+1); 635 vi = aj + diag[i] + 1; 636 nz = ai[i+1] - diag[i] - 1; 637 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 638 while (nz--) { 639 idx = 5*(*vi++); 640 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 641 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 642 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 643 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 644 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 645 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 646 v += 25; 647 } 648 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 649 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 650 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 651 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 652 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 653 idiag -= 25; 654 i2 -= 5; 655 } 656 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 657 } 658 } else { 659 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 660 } 661 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 668 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 669 { 670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 671 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 672 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 673 PetscErrorCode ierr; 674 PetscInt m = a->mbs,i,i2,nz,idx; 675 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 676 677 PetscFunctionBegin; 678 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 679 its = its*lits; 680 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 681 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 682 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 683 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 684 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 685 686 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 687 688 diag = a->diag; 689 idiag = a->idiag; 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 692 693 if (flag & SOR_ZERO_INITIAL_GUESS) { 694 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 695 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 696 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 697 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 698 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 699 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 700 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 701 i2 = 6; 702 idiag += 36; 703 for (i=1; i<m; i++) { 704 v = aa + 36*ai[i]; 705 vi = aj + ai[i]; 706 nz = diag[i] - ai[i]; 707 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 708 while (nz--) { 709 idx = 6*(*vi++); 710 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 711 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 712 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 713 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 714 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 715 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 716 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 717 v += 36; 718 } 719 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 720 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 721 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 722 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 723 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 724 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 725 idiag += 36; 726 i2 += 6; 727 } 728 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 729 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 730 } 731 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 732 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 733 i2 = 0; 734 mdiag = a->idiag+36*a->mbs; 735 for (i=0; i<m; i++) { 736 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 737 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 738 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 739 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 740 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 741 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 742 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 743 mdiag += 36; 744 i2 += 6; 745 } 746 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 747 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 749 } 750 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 751 idiag = a->idiag+36*a->mbs - 36; 752 i2 = 6*m - 6; 753 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 754 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 755 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 756 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 757 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 758 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 759 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 760 idiag -= 36; 761 i2 -= 6; 762 for (i=m-2; i>=0; i--) { 763 v = aa + 36*(diag[i]+1); 764 vi = aj + diag[i] + 1; 765 nz = ai[i+1] - diag[i] - 1; 766 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 767 while (nz--) { 768 idx = 6*(*vi++); 769 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 770 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 771 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 772 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 773 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 774 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 775 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 776 v += 36; 777 } 778 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 779 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 780 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 781 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 782 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 783 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 784 idiag -= 36; 785 i2 -= 6; 786 } 787 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 788 } 789 } else { 790 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 791 } 792 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 793 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 799 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 800 { 801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 802 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 803 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 804 PetscErrorCode ierr; 805 PetscInt m = a->mbs,i,i2,nz,idx; 806 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 807 808 PetscFunctionBegin; 809 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 810 its = its*lits; 811 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 812 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 813 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 814 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 815 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 816 817 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 818 819 diag = a->diag; 820 idiag = a->idiag; 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 823 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 826 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 827 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 828 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 829 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 830 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 831 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 832 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 833 i2 = 7; 834 idiag += 49; 835 for (i=1; i<m; i++) { 836 v = aa + 49*ai[i]; 837 vi = aj + ai[i]; 838 nz = diag[i] - ai[i]; 839 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 843 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 844 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 845 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 846 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 847 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 848 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 849 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 850 v += 49; 851 } 852 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 853 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 854 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 855 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 856 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 857 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 858 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 859 idiag += 49; 860 i2 += 7; 861 } 862 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 863 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 864 } 865 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 866 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 867 i2 = 0; 868 mdiag = a->idiag+49*a->mbs; 869 for (i=0; i<m; i++) { 870 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 871 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 872 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 873 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 874 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 875 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 876 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 877 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 878 mdiag += 36; 879 i2 += 6; 880 } 881 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 882 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 883 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 886 idiag = a->idiag+49*a->mbs - 49; 887 i2 = 7*m - 7; 888 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 889 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 890 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 891 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 892 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 893 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 894 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 895 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 896 idiag -= 49; 897 i2 -= 7; 898 for (i=m-2; i>=0; i--) { 899 v = aa + 49*(diag[i]+1); 900 vi = aj + diag[i] + 1; 901 nz = ai[i+1] - diag[i] - 1; 902 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 903 while (nz--) { 904 idx = 7*(*vi++); 905 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 906 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 907 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 908 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 909 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 910 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 911 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 912 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 913 v += 49; 914 } 915 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 916 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 917 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 918 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 919 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 920 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 921 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 926 } 927 } else { 928 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 929 } 930 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 931 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 935 /* 936 Special version for direct calls from Fortran (Used in PETSc-fun3d) 937 */ 938 #if defined(PETSC_HAVE_FORTRAN_CAPS) 939 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 941 #define matsetvaluesblocked4_ matsetvaluesblocked4 942 #endif 943 944 EXTERN_C_BEGIN 945 #undef __FUNCT__ 946 #define __FUNCT__ "matsetvaluesblocked4_" 947 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 948 { 949 Mat A = *AA; 950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 951 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 952 PetscInt *ai=a->i,*ailen=a->ilen; 953 PetscInt *aj=a->j,stepval,lastcol = -1; 954 const PetscScalar *value = v; 955 MatScalar *ap,*aa = a->a,*bap; 956 957 PetscFunctionBegin; 958 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 959 stepval = (n-1)*4; 960 for (k=0; k<m; k++) { /* loop over added rows */ 961 row = im[k]; 962 rp = aj + ai[row]; 963 ap = aa + 16*ai[row]; 964 nrow = ailen[row]; 965 low = 0; 966 high = nrow; 967 for (l=0; l<n; l++) { /* loop over added columns */ 968 col = in[l]; 969 if (col <= lastcol) low = 0; else high = nrow; 970 lastcol = col; 971 value = v + k*(stepval+4 + l)*4; 972 while (high-low > 7) { 973 t = (low+high)/2; 974 if (rp[t] > col) high = t; 975 else low = t; 976 } 977 for (i=low; i<high; i++) { 978 if (rp[i] > col) break; 979 if (rp[i] == col) { 980 bap = ap + 16*i; 981 for (ii=0; ii<4; ii++,value+=stepval) { 982 for (jj=ii; jj<16; jj+=4) { 983 bap[jj] += *value++; 984 } 985 } 986 goto noinsert2; 987 } 988 } 989 N = nrow++ - 1; 990 high++; /* added new column index thus must search to one higher than before */ 991 /* shift up all the later entries in this row */ 992 for (ii=N; ii>=i; ii--) { 993 rp[ii+1] = rp[ii]; 994 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 995 } 996 if (N >= i) { 997 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 998 } 999 rp[i] = col; 1000 bap = ap + 16*i; 1001 for (ii=0; ii<4; ii++,value+=stepval) { 1002 for (jj=ii; jj<16; jj+=4) { 1003 bap[jj] = *value++; 1004 } 1005 } 1006 noinsert2:; 1007 low = i; 1008 } 1009 ailen[row] = nrow; 1010 } 1011 PetscFunctionReturnVoid(); 1012 } 1013 EXTERN_C_END 1014 1015 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1016 #define matsetvalues4_ MATSETVALUES4 1017 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1018 #define matsetvalues4_ matsetvalues4 1019 #endif 1020 1021 EXTERN_C_BEGIN 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatSetValues4_" 1024 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1025 { 1026 Mat A = *AA; 1027 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1028 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1029 PetscInt *ai=a->i,*ailen=a->ilen; 1030 PetscInt *aj=a->j,brow,bcol; 1031 PetscInt ridx,cidx,lastcol = -1; 1032 MatScalar *ap,value,*aa=a->a,*bap; 1033 1034 PetscFunctionBegin; 1035 for (k=0; k<m; k++) { /* loop over added rows */ 1036 row = im[k]; brow = row/4; 1037 rp = aj + ai[brow]; 1038 ap = aa + 16*ai[brow]; 1039 nrow = ailen[brow]; 1040 low = 0; 1041 high = nrow; 1042 for (l=0; l<n; l++) { /* loop over added columns */ 1043 col = in[l]; bcol = col/4; 1044 ridx = row % 4; cidx = col % 4; 1045 value = v[l + k*n]; 1046 if (col <= lastcol) low = 0; else high = nrow; 1047 lastcol = col; 1048 while (high-low > 7) { 1049 t = (low+high)/2; 1050 if (rp[t] > bcol) high = t; 1051 else low = t; 1052 } 1053 for (i=low; i<high; i++) { 1054 if (rp[i] > bcol) break; 1055 if (rp[i] == bcol) { 1056 bap = ap + 16*i + 4*cidx + ridx; 1057 *bap += value; 1058 goto noinsert1; 1059 } 1060 } 1061 N = nrow++ - 1; 1062 high++; /* added new column thus must search to one higher than before */ 1063 /* shift up all the later entries in this row */ 1064 for (ii=N; ii>=i; ii--) { 1065 rp[ii+1] = rp[ii]; 1066 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1067 } 1068 if (N>=i) { 1069 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1070 } 1071 rp[i] = bcol; 1072 ap[16*i + 4*cidx + ridx] = value; 1073 noinsert1:; 1074 low = i; 1075 } 1076 ailen[brow] = nrow; 1077 } 1078 PetscFunctionReturnVoid(); 1079 } 1080 EXTERN_C_END 1081 1082 /* 1083 Checks for missing diagonals 1084 */ 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1087 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1088 { 1089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt *diag,*jj = a->j,i; 1092 1093 PetscFunctionBegin; 1094 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1095 *missing = PETSC_FALSE; 1096 diag = a->diag; 1097 for (i=0; i<a->mbs; i++) { 1098 if (jj[diag[i]] != i) { 1099 *missing = PETSC_TRUE; 1100 if (d) *d = i; 1101 } 1102 } 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1108 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1109 { 1110 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt i,j,m = a->mbs; 1113 1114 PetscFunctionBegin; 1115 if (!a->diag) { 1116 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1117 } 1118 for (i=0; i<m; i++) { 1119 a->diag[i] = a->i[i+1]; 1120 for (j=a->i[i]; j<a->i[i+1]; j++) { 1121 if (a->j[j] == i) { 1122 a->diag[i] = j; 1123 break; 1124 } 1125 } 1126 } 1127 PetscFunctionReturn(0); 1128 } 1129 1130 1131 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1135 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1136 { 1137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1140 PetscInt *tia, *tja; 1141 1142 PetscFunctionBegin; 1143 *nn = n; 1144 if (!ia) PetscFunctionReturn(0); 1145 if (symmetric) { 1146 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1147 } else { 1148 tia = a->i; tja = a->j; 1149 } 1150 1151 if (!blockcompressed && bs > 1) { 1152 (*nn) *= bs; 1153 nbs = bs; 1154 /* malloc & create the natural set of indices */ 1155 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1156 if (n) { 1157 (*ia)[0] = 0; 1158 for (j=1; j<bs; j++) { 1159 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1160 } 1161 } 1162 1163 for (i=1; i<n; i++) { 1164 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1165 for (j=1; j<bs; j++) { 1166 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1167 } 1168 } 1169 if (n) { 1170 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1171 } 1172 1173 if (ja) { 1174 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1175 cnt = 0; 1176 for (i=0; i<n; i++) { 1177 for (j=0; j<bs; j++) { 1178 for (k=tia[i]; k<tia[i+1]; k++) { 1179 for (l=0; l<bs; l++) { 1180 (*ja)[cnt++] = bs*tja[k] + l; 1181 } 1182 } 1183 } 1184 } 1185 } 1186 1187 n *= bs; 1188 nz *= bs*bs; 1189 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1190 ierr = PetscFree(tia);CHKERRQ(ierr); 1191 ierr = PetscFree(tja);CHKERRQ(ierr); 1192 } 1193 } else { 1194 *ia = tia; 1195 if (ja) *ja = tja; 1196 } 1197 if (oshift == 1) { 1198 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1199 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1200 } 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1206 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1207 { 1208 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1209 PetscErrorCode ierr; 1210 PetscInt i,n = a->mbs,nz = a->i[n]; 1211 1212 PetscFunctionBegin; 1213 if (!ia) PetscFunctionReturn(0); 1214 if (!blockcompressed && A->rmap.bs > 1) { 1215 ierr = PetscFree(*ia);CHKERRQ(ierr); 1216 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1217 } else if (symmetric) { 1218 ierr = PetscFree(*ia);CHKERRQ(ierr); 1219 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1220 } else if (oshift == 1) { /* blockcompressed */ 1221 for (i=0; i<n+1; i++) a->i[i]--; 1222 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1223 } 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1229 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1230 { 1231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 #if defined(PETSC_USE_LOG) 1236 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1237 #endif 1238 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1239 if (a->row) { 1240 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1241 } 1242 if (a->col) { 1243 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1244 } 1245 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1246 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1247 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1248 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1249 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1250 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1251 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1252 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1253 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1254 1255 ierr = PetscFree(a);CHKERRQ(ierr); 1256 1257 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1262 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1263 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1270 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 switch (op) { 1277 case MAT_ROW_ORIENTED: 1278 a->roworiented = flg; 1279 break; 1280 case MAT_KEEP_ZEROED_ROWS: 1281 a->keepzeroedrows = flg; 1282 break; 1283 case MAT_NEW_NONZERO_LOCATIONS: 1284 a->nonew = (flg ? 0 : 1); 1285 break; 1286 case MAT_NEW_NONZERO_LOCATION_ERR: 1287 a->nonew = (flg ? -1 : 0); 1288 break; 1289 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1290 a->nonew = (flg ? -2 : 0); 1291 break; 1292 case MAT_NEW_DIAGONALS: 1293 case MAT_IGNORE_OFF_PROC_ENTRIES: 1294 case MAT_USE_HASH_TABLE: 1295 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1296 break; 1297 case MAT_SYMMETRIC: 1298 case MAT_STRUCTURALLY_SYMMETRIC: 1299 case MAT_HERMITIAN: 1300 case MAT_SYMMETRY_ETERNAL: 1301 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1302 break; 1303 default: 1304 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1311 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1312 { 1313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1314 PetscErrorCode ierr; 1315 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1316 MatScalar *aa,*aa_i; 1317 PetscScalar *v_i; 1318 1319 PetscFunctionBegin; 1320 bs = A->rmap.bs; 1321 ai = a->i; 1322 aj = a->j; 1323 aa = a->a; 1324 bs2 = a->bs2; 1325 1326 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1327 1328 bn = row/bs; /* Block number */ 1329 bp = row % bs; /* Block Position */ 1330 M = ai[bn+1] - ai[bn]; 1331 *nz = bs*M; 1332 1333 if (v) { 1334 *v = 0; 1335 if (*nz) { 1336 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1337 for (i=0; i<M; i++) { /* for each block in the block row */ 1338 v_i = *v + i*bs; 1339 aa_i = aa + bs2*(ai[bn] + i); 1340 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1341 } 1342 } 1343 } 1344 1345 if (idx) { 1346 *idx = 0; 1347 if (*nz) { 1348 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1349 for (i=0; i<M; i++) { /* for each block in the block row */ 1350 idx_i = *idx + i*bs; 1351 itmp = bs*aj[ai[bn] + i]; 1352 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1353 } 1354 } 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1361 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1362 { 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1367 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1373 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1374 { 1375 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1376 Mat C; 1377 PetscErrorCode ierr; 1378 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1379 PetscInt *rows,*cols,bs2=a->bs2; 1380 PetscScalar *array; 1381 1382 PetscFunctionBegin; 1383 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1384 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1385 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1386 1387 array = a->a; 1388 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1389 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1390 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1391 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1392 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1393 ierr = PetscFree(col);CHKERRQ(ierr); 1394 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1395 cols = rows + bs; 1396 for (i=0; i<mbs; i++) { 1397 cols[0] = i*bs; 1398 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1399 len = ai[i+1] - ai[i]; 1400 for (j=0; j<len; j++) { 1401 rows[0] = (*aj++)*bs; 1402 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1403 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1404 array += bs2; 1405 } 1406 } 1407 ierr = PetscFree(rows);CHKERRQ(ierr); 1408 1409 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 1412 if (B) { 1413 *B = C; 1414 } else { 1415 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1422 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1423 { 1424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1427 int fd; 1428 PetscScalar *aa; 1429 FILE *file; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1433 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1434 col_lens[0] = MAT_FILE_COOKIE; 1435 1436 col_lens[1] = A->rmap.N; 1437 col_lens[2] = A->cmap.n; 1438 col_lens[3] = a->nz*bs2; 1439 1440 /* store lengths of each row and write (including header) to file */ 1441 count = 0; 1442 for (i=0; i<a->mbs; i++) { 1443 for (j=0; j<bs; j++) { 1444 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1445 } 1446 } 1447 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1448 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1449 1450 /* store column indices (zero start index) */ 1451 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1452 count = 0; 1453 for (i=0; i<a->mbs; i++) { 1454 for (j=0; j<bs; j++) { 1455 for (k=a->i[i]; k<a->i[i+1]; k++) { 1456 for (l=0; l<bs; l++) { 1457 jj[count++] = bs*a->j[k] + l; 1458 } 1459 } 1460 } 1461 } 1462 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1463 ierr = PetscFree(jj);CHKERRQ(ierr); 1464 1465 /* store nonzero values */ 1466 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1467 count = 0; 1468 for (i=0; i<a->mbs; i++) { 1469 for (j=0; j<bs; j++) { 1470 for (k=a->i[i]; k<a->i[i+1]; k++) { 1471 for (l=0; l<bs; l++) { 1472 aa[count++] = a->a[bs2*k + l*bs + j]; 1473 } 1474 } 1475 } 1476 } 1477 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1478 ierr = PetscFree(aa);CHKERRQ(ierr); 1479 1480 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1481 if (file) { 1482 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1489 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1490 { 1491 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1492 PetscErrorCode ierr; 1493 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1494 PetscViewerFormat format; 1495 1496 PetscFunctionBegin; 1497 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1498 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1499 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1500 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1501 Mat aij; 1502 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1503 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1504 ierr = MatDestroy(aij);CHKERRQ(ierr); 1505 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1506 PetscFunctionReturn(0); 1507 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1508 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1509 for (i=0; i<a->mbs; i++) { 1510 for (j=0; j<bs; j++) { 1511 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1512 for (k=a->i[i]; k<a->i[i+1]; k++) { 1513 for (l=0; l<bs; l++) { 1514 #if defined(PETSC_USE_COMPLEX) 1515 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1516 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1517 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1518 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1519 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1520 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1521 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1522 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1523 } 1524 #else 1525 if (a->a[bs2*k + l*bs + j] != 0.0) { 1526 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1527 } 1528 #endif 1529 } 1530 } 1531 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1532 } 1533 } 1534 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1535 } else { 1536 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1537 for (i=0; i<a->mbs; i++) { 1538 for (j=0; j<bs; j++) { 1539 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1540 for (k=a->i[i]; k<a->i[i+1]; k++) { 1541 for (l=0; l<bs; l++) { 1542 #if defined(PETSC_USE_COMPLEX) 1543 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1545 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1546 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1547 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1548 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1551 } 1552 #else 1553 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1554 #endif 1555 } 1556 } 1557 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1558 } 1559 } 1560 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1561 } 1562 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1568 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1569 { 1570 Mat A = (Mat) Aa; 1571 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1572 PetscErrorCode ierr; 1573 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1574 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1575 MatScalar *aa; 1576 PetscViewer viewer; 1577 1578 PetscFunctionBegin; 1579 1580 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1581 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1582 1583 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1584 1585 /* loop over matrix elements drawing boxes */ 1586 color = PETSC_DRAW_BLUE; 1587 for (i=0,row=0; i<mbs; i++,row+=bs) { 1588 for (j=a->i[i]; j<a->i[i+1]; j++) { 1589 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1590 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1591 aa = a->a + j*bs2; 1592 for (k=0; k<bs; k++) { 1593 for (l=0; l<bs; l++) { 1594 if (PetscRealPart(*aa++) >= 0.) continue; 1595 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1596 } 1597 } 1598 } 1599 } 1600 color = PETSC_DRAW_CYAN; 1601 for (i=0,row=0; i<mbs; i++,row+=bs) { 1602 for (j=a->i[i]; j<a->i[i+1]; j++) { 1603 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1604 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1605 aa = a->a + j*bs2; 1606 for (k=0; k<bs; k++) { 1607 for (l=0; l<bs; l++) { 1608 if (PetscRealPart(*aa++) != 0.) continue; 1609 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1610 } 1611 } 1612 } 1613 } 1614 1615 color = PETSC_DRAW_RED; 1616 for (i=0,row=0; i<mbs; i++,row+=bs) { 1617 for (j=a->i[i]; j<a->i[i+1]; j++) { 1618 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1619 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1620 aa = a->a + j*bs2; 1621 for (k=0; k<bs; k++) { 1622 for (l=0; l<bs; l++) { 1623 if (PetscRealPart(*aa++) <= 0.) continue; 1624 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1625 } 1626 } 1627 } 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1634 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1635 { 1636 PetscErrorCode ierr; 1637 PetscReal xl,yl,xr,yr,w,h; 1638 PetscDraw draw; 1639 PetscTruth isnull; 1640 1641 PetscFunctionBegin; 1642 1643 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1644 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1645 1646 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1647 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1648 xr += w; yr += h; xl = -w; yl = -h; 1649 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1650 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1651 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatView_SeqBAIJ" 1657 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1658 { 1659 PetscErrorCode ierr; 1660 PetscTruth iascii,isbinary,isdraw; 1661 1662 PetscFunctionBegin; 1663 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1664 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1666 if (iascii){ 1667 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1668 } else if (isbinary) { 1669 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1670 } else if (isdraw) { 1671 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1672 } else { 1673 Mat B; 1674 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1675 ierr = MatView(B,viewer);CHKERRQ(ierr); 1676 ierr = MatDestroy(B);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1684 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1685 { 1686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1687 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1688 PetscInt *ai = a->i,*ailen = a->ilen; 1689 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1690 MatScalar *ap,*aa = a->a; 1691 1692 PetscFunctionBegin; 1693 for (k=0; k<m; k++) { /* loop over rows */ 1694 row = im[k]; brow = row/bs; 1695 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1696 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1697 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1698 nrow = ailen[brow]; 1699 for (l=0; l<n; l++) { /* loop over columns */ 1700 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1701 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1702 col = in[l] ; 1703 bcol = col/bs; 1704 cidx = col%bs; 1705 ridx = row%bs; 1706 high = nrow; 1707 low = 0; /* assume unsorted */ 1708 while (high-low > 5) { 1709 t = (low+high)/2; 1710 if (rp[t] > bcol) high = t; 1711 else low = t; 1712 } 1713 for (i=low; i<high; i++) { 1714 if (rp[i] > bcol) break; 1715 if (rp[i] == bcol) { 1716 *v++ = ap[bs2*i+bs*cidx+ridx]; 1717 goto finished; 1718 } 1719 } 1720 *v++ = 0.0; 1721 finished:; 1722 } 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #define CHUNKSIZE 10 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1730 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1731 { 1732 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1733 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1734 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1735 PetscErrorCode ierr; 1736 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1737 PetscTruth roworiented=a->roworiented; 1738 const MatScalar *value = v; 1739 MatScalar *ap,*aa = a->a,*bap; 1740 1741 PetscFunctionBegin; 1742 if (roworiented) { 1743 stepval = (n-1)*bs; 1744 } else { 1745 stepval = (m-1)*bs; 1746 } 1747 for (k=0; k<m; k++) { /* loop over added rows */ 1748 row = im[k]; 1749 if (row < 0) continue; 1750 #if defined(PETSC_USE_DEBUG) 1751 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1752 #endif 1753 rp = aj + ai[row]; 1754 ap = aa + bs2*ai[row]; 1755 rmax = imax[row]; 1756 nrow = ailen[row]; 1757 low = 0; 1758 high = nrow; 1759 for (l=0; l<n; l++) { /* loop over added columns */ 1760 if (in[l] < 0) continue; 1761 #if defined(PETSC_USE_DEBUG) 1762 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1763 #endif 1764 col = in[l]; 1765 if (roworiented) { 1766 value = v + k*(stepval+bs)*bs + l*bs; 1767 } else { 1768 value = v + l*(stepval+bs)*bs + k*bs; 1769 } 1770 if (col <= lastcol) low = 0; else high = nrow; 1771 lastcol = col; 1772 while (high-low > 7) { 1773 t = (low+high)/2; 1774 if (rp[t] > col) high = t; 1775 else low = t; 1776 } 1777 for (i=low; i<high; i++) { 1778 if (rp[i] > col) break; 1779 if (rp[i] == col) { 1780 bap = ap + bs2*i; 1781 if (roworiented) { 1782 if (is == ADD_VALUES) { 1783 for (ii=0; ii<bs; ii++,value+=stepval) { 1784 for (jj=ii; jj<bs2; jj+=bs) { 1785 bap[jj] += *value++; 1786 } 1787 } 1788 } else { 1789 for (ii=0; ii<bs; ii++,value+=stepval) { 1790 for (jj=ii; jj<bs2; jj+=bs) { 1791 bap[jj] = *value++; 1792 } 1793 } 1794 } 1795 } else { 1796 if (is == ADD_VALUES) { 1797 for (ii=0; ii<bs; ii++,value+=stepval) { 1798 for (jj=0; jj<bs; jj++) { 1799 *bap++ += *value++; 1800 } 1801 } 1802 } else { 1803 for (ii=0; ii<bs; ii++,value+=stepval) { 1804 for (jj=0; jj<bs; jj++) { 1805 *bap++ = *value++; 1806 } 1807 } 1808 } 1809 } 1810 goto noinsert2; 1811 } 1812 } 1813 if (nonew == 1) goto noinsert2; 1814 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1815 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1816 N = nrow++ - 1; high++; 1817 /* shift up all the later entries in this row */ 1818 for (ii=N; ii>=i; ii--) { 1819 rp[ii+1] = rp[ii]; 1820 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1821 } 1822 if (N >= i) { 1823 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1824 } 1825 rp[i] = col; 1826 bap = ap + bs2*i; 1827 if (roworiented) { 1828 for (ii=0; ii<bs; ii++,value+=stepval) { 1829 for (jj=ii; jj<bs2; jj+=bs) { 1830 bap[jj] = *value++; 1831 } 1832 } 1833 } else { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=0; jj<bs; jj++) { 1836 *bap++ = *value++; 1837 } 1838 } 1839 } 1840 noinsert2:; 1841 low = i; 1842 } 1843 ailen[row] = nrow; 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1850 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1851 { 1852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1853 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1854 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1855 PetscErrorCode ierr; 1856 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1857 MatScalar *aa = a->a,*ap; 1858 PetscReal ratio=0.6; 1859 1860 PetscFunctionBegin; 1861 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1862 1863 if (m) rmax = ailen[0]; 1864 for (i=1; i<mbs; i++) { 1865 /* move each row back by the amount of empty slots (fshift) before it*/ 1866 fshift += imax[i-1] - ailen[i-1]; 1867 rmax = PetscMax(rmax,ailen[i]); 1868 if (fshift) { 1869 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1870 N = ailen[i]; 1871 for (j=0; j<N; j++) { 1872 ip[j-fshift] = ip[j]; 1873 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1874 } 1875 } 1876 ai[i] = ai[i-1] + ailen[i-1]; 1877 } 1878 if (mbs) { 1879 fshift += imax[mbs-1] - ailen[mbs-1]; 1880 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1881 } 1882 /* reset ilen and imax for each row */ 1883 for (i=0; i<mbs; i++) { 1884 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1885 } 1886 a->nz = ai[mbs]; 1887 1888 /* diagonals may have moved, so kill the diagonal pointers */ 1889 a->idiagvalid = PETSC_FALSE; 1890 if (fshift && a->diag) { 1891 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1892 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1893 a->diag = 0; 1894 } 1895 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1896 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1897 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1898 a->reallocs = 0; 1899 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1900 1901 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1902 if (a->compressedrow.use){ 1903 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1904 } 1905 1906 A->same_nonzero = PETSC_TRUE; 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /* 1911 This function returns an array of flags which indicate the locations of contiguous 1912 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1913 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1914 Assume: sizes should be long enough to hold all the values. 1915 */ 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1918 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1919 { 1920 PetscInt i,j,k,row; 1921 PetscTruth flg; 1922 1923 PetscFunctionBegin; 1924 for (i=0,j=0; i<n; j++) { 1925 row = idx[i]; 1926 if (row%bs!=0) { /* Not the begining of a block */ 1927 sizes[j] = 1; 1928 i++; 1929 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1930 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1931 i++; 1932 } else { /* Begining of the block, so check if the complete block exists */ 1933 flg = PETSC_TRUE; 1934 for (k=1; k<bs; k++) { 1935 if (row+k != idx[i+k]) { /* break in the block */ 1936 flg = PETSC_FALSE; 1937 break; 1938 } 1939 } 1940 if (flg) { /* No break in the bs */ 1941 sizes[j] = bs; 1942 i+= bs; 1943 } else { 1944 sizes[j] = 1; 1945 i++; 1946 } 1947 } 1948 } 1949 *bs_max = j; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1955 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1956 { 1957 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1958 PetscErrorCode ierr; 1959 PetscInt i,j,k,count,*rows; 1960 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1961 PetscScalar zero = 0.0; 1962 MatScalar *aa; 1963 1964 PetscFunctionBegin; 1965 /* Make a copy of the IS and sort it */ 1966 /* allocate memory for rows,sizes */ 1967 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1968 sizes = rows + is_n; 1969 1970 /* copy IS values to rows, and sort them */ 1971 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1972 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1973 if (baij->keepzeroedrows) { 1974 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1975 bs_max = is_n; 1976 A->same_nonzero = PETSC_TRUE; 1977 } else { 1978 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1979 A->same_nonzero = PETSC_FALSE; 1980 } 1981 1982 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1983 row = rows[j]; 1984 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1985 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1986 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1987 if (sizes[i] == bs && !baij->keepzeroedrows) { 1988 if (diag != 0.0) { 1989 if (baij->ilen[row/bs] > 0) { 1990 baij->ilen[row/bs] = 1; 1991 baij->j[baij->i[row/bs]] = row/bs; 1992 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1993 } 1994 /* Now insert all the diagonal values for this bs */ 1995 for (k=0; k<bs; k++) { 1996 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1997 } 1998 } else { /* (diag == 0.0) */ 1999 baij->ilen[row/bs] = 0; 2000 } /* end (diag == 0.0) */ 2001 } else { /* (sizes[i] != bs) */ 2002 #if defined (PETSC_USE_DEBUG) 2003 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2004 #endif 2005 for (k=0; k<count; k++) { 2006 aa[0] = zero; 2007 aa += bs; 2008 } 2009 if (diag != 0.0) { 2010 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2011 } 2012 } 2013 } 2014 2015 ierr = PetscFree(rows);CHKERRQ(ierr); 2016 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2022 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2023 { 2024 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2025 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2026 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2027 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2028 PetscErrorCode ierr; 2029 PetscInt ridx,cidx,bs2=a->bs2; 2030 PetscTruth roworiented=a->roworiented; 2031 MatScalar *ap,value,*aa=a->a,*bap; 2032 2033 PetscFunctionBegin; 2034 for (k=0; k<m; k++) { /* loop over added rows */ 2035 row = im[k]; 2036 brow = row/bs; 2037 if (row < 0) continue; 2038 #if defined(PETSC_USE_DEBUG) 2039 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2040 #endif 2041 rp = aj + ai[brow]; 2042 ap = aa + bs2*ai[brow]; 2043 rmax = imax[brow]; 2044 nrow = ailen[brow]; 2045 low = 0; 2046 high = nrow; 2047 for (l=0; l<n; l++) { /* loop over added columns */ 2048 if (in[l] < 0) continue; 2049 #if defined(PETSC_USE_DEBUG) 2050 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2051 #endif 2052 col = in[l]; bcol = col/bs; 2053 ridx = row % bs; cidx = col % bs; 2054 if (roworiented) { 2055 value = v[l + k*n]; 2056 } else { 2057 value = v[k + l*m]; 2058 } 2059 if (col <= lastcol) low = 0; else high = nrow; 2060 lastcol = col; 2061 while (high-low > 7) { 2062 t = (low+high)/2; 2063 if (rp[t] > bcol) high = t; 2064 else low = t; 2065 } 2066 for (i=low; i<high; i++) { 2067 if (rp[i] > bcol) break; 2068 if (rp[i] == bcol) { 2069 bap = ap + bs2*i + bs*cidx + ridx; 2070 if (is == ADD_VALUES) *bap += value; 2071 else *bap = value; 2072 goto noinsert1; 2073 } 2074 } 2075 if (nonew == 1) goto noinsert1; 2076 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2077 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2078 N = nrow++ - 1; high++; 2079 /* shift up all the later entries in this row */ 2080 for (ii=N; ii>=i; ii--) { 2081 rp[ii+1] = rp[ii]; 2082 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2083 } 2084 if (N>=i) { 2085 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2086 } 2087 rp[i] = bcol; 2088 ap[bs2*i + bs*cidx + ridx] = value; 2089 a->nz++; 2090 noinsert1:; 2091 low = i; 2092 } 2093 ailen[brow] = nrow; 2094 } 2095 A->same_nonzero = PETSC_FALSE; 2096 PetscFunctionReturn(0); 2097 } 2098 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2102 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2103 { 2104 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2105 Mat outA; 2106 PetscErrorCode ierr; 2107 PetscTruth row_identity,col_identity; 2108 2109 PetscFunctionBegin; 2110 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2111 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2112 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2113 if (!row_identity || !col_identity) { 2114 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2115 } 2116 2117 outA = inA; 2118 inA->factor = FACTOR_LU; 2119 2120 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2121 2122 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2123 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2124 a->row = row; 2125 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2126 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2127 a->col = col; 2128 2129 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2130 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2132 2133 /* 2134 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2135 for ILU(0) factorization with natural ordering 2136 */ 2137 if (inA->rmap.bs < 8) { 2138 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2139 } else { 2140 if (!a->solve_work) { 2141 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2142 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2143 } 2144 } 2145 2146 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2147 2148 PetscFunctionReturn(0); 2149 } 2150 2151 EXTERN_C_BEGIN 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2154 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2155 { 2156 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2157 PetscInt i,nz,nbs; 2158 2159 PetscFunctionBegin; 2160 nz = baij->maxnz/baij->bs2; 2161 nbs = baij->nbs; 2162 for (i=0; i<nz; i++) { 2163 baij->j[i] = indices[i]; 2164 } 2165 baij->nz = nz; 2166 for (i=0; i<nbs; i++) { 2167 baij->ilen[i] = baij->imax[i]; 2168 } 2169 2170 PetscFunctionReturn(0); 2171 } 2172 EXTERN_C_END 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2176 /*@ 2177 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2178 in the matrix. 2179 2180 Input Parameters: 2181 + mat - the SeqBAIJ matrix 2182 - indices - the column indices 2183 2184 Level: advanced 2185 2186 Notes: 2187 This can be called if you have precomputed the nonzero structure of the 2188 matrix and want to provide it to the matrix object to improve the performance 2189 of the MatSetValues() operation. 2190 2191 You MUST have set the correct numbers of nonzeros per row in the call to 2192 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2193 2194 MUST be called before any calls to MatSetValues(); 2195 2196 @*/ 2197 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2198 { 2199 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2200 2201 PetscFunctionBegin; 2202 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2203 PetscValidPointer(indices,2); 2204 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2205 if (f) { 2206 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2207 } else { 2208 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2215 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2216 { 2217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2218 PetscErrorCode ierr; 2219 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2220 PetscReal atmp; 2221 PetscScalar *x,zero = 0.0; 2222 MatScalar *aa; 2223 PetscInt ncols,brow,krow,kcol; 2224 2225 PetscFunctionBegin; 2226 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2227 bs = A->rmap.bs; 2228 aa = a->a; 2229 ai = a->i; 2230 aj = a->j; 2231 mbs = a->mbs; 2232 2233 ierr = VecSet(v,zero);CHKERRQ(ierr); 2234 if (idx) { 2235 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 2236 } 2237 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2238 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2239 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2240 for (i=0; i<mbs; i++) { 2241 ncols = ai[1] - ai[0]; ai++; 2242 brow = bs*i; 2243 for (j=0; j<ncols; j++){ 2244 for (kcol=0; kcol<bs; kcol++){ 2245 for (krow=0; krow<bs; krow++){ 2246 atmp = PetscAbsScalar(*aa);aa++; 2247 row = brow + krow; /* row index */ 2248 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2249 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2250 } 2251 } 2252 aj++; 2253 } 2254 } 2255 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatCopy_SeqBAIJ" 2261 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2262 { 2263 PetscErrorCode ierr; 2264 2265 PetscFunctionBegin; 2266 /* If the two matrices have the same copy implementation, use fast copy. */ 2267 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2269 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2270 2271 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2272 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2273 } 2274 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2275 } else { 2276 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2277 } 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2283 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2284 { 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2294 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2295 { 2296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2297 PetscFunctionBegin; 2298 *array = a->a; 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2304 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2305 { 2306 PetscFunctionBegin; 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #include "petscblaslapack.h" 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2313 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2314 { 2315 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2316 PetscErrorCode ierr; 2317 PetscInt i,bs=Y->rmap.bs,j,bs2; 2318 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2319 2320 PetscFunctionBegin; 2321 if (str == SAME_NONZERO_PATTERN) { 2322 PetscScalar alpha = a; 2323 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2324 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2325 if (y->xtoy && y->XtoY != X) { 2326 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2327 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2328 } 2329 if (!y->xtoy) { /* get xtoy */ 2330 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2331 y->XtoY = X; 2332 } 2333 bs2 = bs*bs; 2334 for (i=0; i<x->nz; i++) { 2335 j = 0; 2336 while (j < bs2){ 2337 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2338 j++; 2339 } 2340 } 2341 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2342 } else { 2343 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2344 } 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2350 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2351 { 2352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2353 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2354 PetscScalar *aa = a->a; 2355 2356 PetscFunctionBegin; 2357 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2363 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2364 { 2365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2366 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2367 PetscScalar *aa = a->a; 2368 2369 PetscFunctionBegin; 2370 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 2375 /* -------------------------------------------------------------------*/ 2376 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2377 MatGetRow_SeqBAIJ, 2378 MatRestoreRow_SeqBAIJ, 2379 MatMult_SeqBAIJ_N, 2380 /* 4*/ MatMultAdd_SeqBAIJ_N, 2381 MatMultTranspose_SeqBAIJ, 2382 MatMultTransposeAdd_SeqBAIJ, 2383 MatSolve_SeqBAIJ_N, 2384 0, 2385 0, 2386 /*10*/ 0, 2387 MatLUFactor_SeqBAIJ, 2388 0, 2389 0, 2390 MatTranspose_SeqBAIJ, 2391 /*15*/ MatGetInfo_SeqBAIJ, 2392 MatEqual_SeqBAIJ, 2393 MatGetDiagonal_SeqBAIJ, 2394 MatDiagonalScale_SeqBAIJ, 2395 MatNorm_SeqBAIJ, 2396 /*20*/ 0, 2397 MatAssemblyEnd_SeqBAIJ, 2398 0, 2399 MatSetOption_SeqBAIJ, 2400 MatZeroEntries_SeqBAIJ, 2401 /*25*/ MatZeroRows_SeqBAIJ, 2402 MatLUFactorSymbolic_SeqBAIJ, 2403 MatLUFactorNumeric_SeqBAIJ_N, 2404 MatCholeskyFactorSymbolic_SeqBAIJ, 2405 MatCholeskyFactorNumeric_SeqBAIJ_N, 2406 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2407 MatILUFactorSymbolic_SeqBAIJ, 2408 MatICCFactorSymbolic_SeqBAIJ, 2409 MatGetArray_SeqBAIJ, 2410 MatRestoreArray_SeqBAIJ, 2411 /*35*/ MatDuplicate_SeqBAIJ, 2412 0, 2413 0, 2414 MatILUFactor_SeqBAIJ, 2415 0, 2416 /*40*/ MatAXPY_SeqBAIJ, 2417 MatGetSubMatrices_SeqBAIJ, 2418 MatIncreaseOverlap_SeqBAIJ, 2419 MatGetValues_SeqBAIJ, 2420 MatCopy_SeqBAIJ, 2421 /*45*/ 0, 2422 MatScale_SeqBAIJ, 2423 0, 2424 0, 2425 0, 2426 /*50*/ 0, 2427 MatGetRowIJ_SeqBAIJ, 2428 MatRestoreRowIJ_SeqBAIJ, 2429 0, 2430 0, 2431 /*55*/ 0, 2432 0, 2433 0, 2434 0, 2435 MatSetValuesBlocked_SeqBAIJ, 2436 /*60*/ MatGetSubMatrix_SeqBAIJ, 2437 MatDestroy_SeqBAIJ, 2438 MatView_SeqBAIJ, 2439 0, 2440 0, 2441 /*65*/ 0, 2442 0, 2443 0, 2444 0, 2445 0, 2446 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2447 MatConvert_Basic, 2448 0, 2449 0, 2450 0, 2451 /*75*/ 0, 2452 0, 2453 0, 2454 0, 2455 0, 2456 /*80*/ 0, 2457 0, 2458 0, 2459 0, 2460 MatLoad_SeqBAIJ, 2461 /*85*/ 0, 2462 0, 2463 0, 2464 0, 2465 0, 2466 /*90*/ 0, 2467 0, 2468 0, 2469 0, 2470 0, 2471 /*95*/ 0, 2472 0, 2473 0, 2474 0, 2475 0, 2476 /*100*/0, 2477 0, 2478 0, 2479 0, 2480 0, 2481 /*105*/0, 2482 MatRealPart_SeqBAIJ, 2483 MatImaginaryPart_SeqBAIJ, 2484 0, 2485 0, 2486 /*110*/0, 2487 0, 2488 0, 2489 0, 2490 MatMissingDiagonal_SeqBAIJ 2491 /*115*/ 2492 }; 2493 2494 EXTERN_C_BEGIN 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2497 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2498 { 2499 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2500 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 if (aij->nonew != 1) { 2505 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2506 } 2507 2508 /* allocate space for values if not already there */ 2509 if (!aij->saved_values) { 2510 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2511 } 2512 2513 /* copy values over */ 2514 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 EXTERN_C_END 2518 2519 EXTERN_C_BEGIN 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2522 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2523 { 2524 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2525 PetscErrorCode ierr; 2526 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2527 2528 PetscFunctionBegin; 2529 if (aij->nonew != 1) { 2530 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2531 } 2532 if (!aij->saved_values) { 2533 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2534 } 2535 2536 /* copy values over */ 2537 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 EXTERN_C_END 2541 2542 EXTERN_C_BEGIN 2543 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2544 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2545 EXTERN_C_END 2546 2547 EXTERN_C_BEGIN 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2550 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2551 { 2552 Mat_SeqBAIJ *b; 2553 PetscErrorCode ierr; 2554 PetscInt i,mbs,nbs,bs2,newbs = bs; 2555 PetscTruth flg,skipallocation = PETSC_FALSE; 2556 2557 PetscFunctionBegin; 2558 2559 if (nz == MAT_SKIP_ALLOCATION) { 2560 skipallocation = PETSC_TRUE; 2561 nz = 0; 2562 } 2563 2564 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2565 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2566 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2567 2568 if (nnz && newbs != bs) { 2569 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2570 } 2571 bs = newbs; 2572 2573 B->rmap.bs = B->cmap.bs = bs; 2574 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2575 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2576 2577 B->preallocated = PETSC_TRUE; 2578 2579 mbs = B->rmap.n/bs; 2580 nbs = B->cmap.n/bs; 2581 bs2 = bs*bs; 2582 2583 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2584 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2585 } 2586 2587 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2588 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2589 if (nnz) { 2590 for (i=0; i<mbs; i++) { 2591 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2592 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2593 } 2594 } 2595 2596 b = (Mat_SeqBAIJ*)B->data; 2597 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2598 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2599 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2600 2601 B->ops->solve = MatSolve_SeqBAIJ_Update; 2602 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2603 if (!flg) { 2604 switch (bs) { 2605 case 1: 2606 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2607 B->ops->mult = MatMult_SeqBAIJ_1; 2608 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2609 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2610 break; 2611 case 2: 2612 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2613 B->ops->mult = MatMult_SeqBAIJ_2; 2614 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2615 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2616 break; 2617 case 3: 2618 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2619 B->ops->mult = MatMult_SeqBAIJ_3; 2620 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2621 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2622 break; 2623 case 4: 2624 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2625 B->ops->mult = MatMult_SeqBAIJ_4; 2626 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2627 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2628 break; 2629 case 5: 2630 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2631 B->ops->mult = MatMult_SeqBAIJ_5; 2632 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2633 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2634 break; 2635 case 6: 2636 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2637 B->ops->mult = MatMult_SeqBAIJ_6; 2638 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2639 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2640 break; 2641 case 7: 2642 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2643 B->ops->mult = MatMult_SeqBAIJ_7; 2644 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2645 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2646 break; 2647 default: 2648 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2649 B->ops->mult = MatMult_SeqBAIJ_N; 2650 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2651 break; 2652 } 2653 } 2654 B->rmap.bs = bs; 2655 b->mbs = mbs; 2656 b->nbs = nbs; 2657 if (!skipallocation) { 2658 if (!b->imax) { 2659 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2660 } 2661 /* b->ilen will count nonzeros in each block row so far. */ 2662 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2663 if (!nnz) { 2664 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2665 else if (nz <= 0) nz = 1; 2666 for (i=0; i<mbs; i++) b->imax[i] = nz; 2667 nz = nz*mbs; 2668 } else { 2669 nz = 0; 2670 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2671 } 2672 2673 /* allocate the matrix space */ 2674 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2675 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2676 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2677 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2678 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2679 b->singlemalloc = PETSC_TRUE; 2680 b->i[0] = 0; 2681 for (i=1; i<mbs+1; i++) { 2682 b->i[i] = b->i[i-1] + b->imax[i-1]; 2683 } 2684 b->free_a = PETSC_TRUE; 2685 b->free_ij = PETSC_TRUE; 2686 } else { 2687 b->free_a = PETSC_FALSE; 2688 b->free_ij = PETSC_FALSE; 2689 } 2690 2691 B->rmap.bs = bs; 2692 b->bs2 = bs2; 2693 b->mbs = mbs; 2694 b->nz = 0; 2695 b->maxnz = nz*bs2; 2696 B->info.nz_unneeded = (PetscReal)b->maxnz; 2697 PetscFunctionReturn(0); 2698 } 2699 EXTERN_C_END 2700 2701 /*MC 2702 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2703 block sparse compressed row format. 2704 2705 Options Database Keys: 2706 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2707 2708 Level: beginner 2709 2710 .seealso: MatCreateSeqBAIJ() 2711 M*/ 2712 2713 EXTERN_C_BEGIN 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatCreate_SeqBAIJ" 2716 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2717 { 2718 PetscErrorCode ierr; 2719 PetscMPIInt size; 2720 Mat_SeqBAIJ *b; 2721 2722 PetscFunctionBegin; 2723 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2724 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2725 2726 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2727 B->data = (void*)b; 2728 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2729 B->factor = 0; 2730 B->mapping = 0; 2731 b->row = 0; 2732 b->col = 0; 2733 b->icol = 0; 2734 b->reallocs = 0; 2735 b->saved_values = 0; 2736 2737 b->roworiented = PETSC_TRUE; 2738 b->nonew = 0; 2739 b->diag = 0; 2740 b->solve_work = 0; 2741 b->mult_work = 0; 2742 B->spptr = 0; 2743 B->info.nz_unneeded = (PetscReal)b->maxnz; 2744 b->keepzeroedrows = PETSC_FALSE; 2745 b->xtoy = 0; 2746 b->XtoY = 0; 2747 b->compressedrow.use = PETSC_FALSE; 2748 b->compressedrow.nrows = 0; 2749 b->compressedrow.i = PETSC_NULL; 2750 b->compressedrow.rindex = PETSC_NULL; 2751 b->compressedrow.checked = PETSC_FALSE; 2752 B->same_nonzero = PETSC_FALSE; 2753 2754 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2755 "MatInvertBlockDiagonal_SeqBAIJ", 2756 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2757 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2758 "MatStoreValues_SeqBAIJ", 2759 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2760 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2761 "MatRetrieveValues_SeqBAIJ", 2762 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2764 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2765 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2766 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2767 "MatConvert_SeqBAIJ_SeqAIJ", 2768 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2770 "MatConvert_SeqBAIJ_SeqSBAIJ", 2771 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2773 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2774 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2775 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2776 PetscFunctionReturn(0); 2777 } 2778 EXTERN_C_END 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2782 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2783 { 2784 Mat C; 2785 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2786 PetscErrorCode ierr; 2787 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2788 2789 PetscFunctionBegin; 2790 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2791 2792 *B = 0; 2793 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2794 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2795 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2796 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2797 c = (Mat_SeqBAIJ*)C->data; 2798 2799 C->rmap.N = A->rmap.N; 2800 C->cmap.N = A->cmap.N; 2801 C->rmap.bs = A->rmap.bs; 2802 c->bs2 = a->bs2; 2803 c->mbs = a->mbs; 2804 c->nbs = a->nbs; 2805 2806 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2807 for (i=0; i<mbs; i++) { 2808 c->imax[i] = a->imax[i]; 2809 c->ilen[i] = a->ilen[i]; 2810 } 2811 2812 /* allocate the matrix space */ 2813 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2814 c->singlemalloc = PETSC_TRUE; 2815 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2816 if (mbs > 0) { 2817 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2818 if (cpvalues == MAT_COPY_VALUES) { 2819 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2820 } else { 2821 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2822 } 2823 } 2824 c->roworiented = a->roworiented; 2825 c->nonew = a->nonew; 2826 2827 if (a->diag) { 2828 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2829 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2830 for (i=0; i<mbs; i++) { 2831 c->diag[i] = a->diag[i]; 2832 } 2833 } else c->diag = 0; 2834 c->nz = a->nz; 2835 c->maxnz = a->maxnz; 2836 c->solve_work = 0; 2837 c->mult_work = 0; 2838 c->free_a = PETSC_TRUE; 2839 c->free_ij = PETSC_TRUE; 2840 C->preallocated = PETSC_TRUE; 2841 C->assembled = PETSC_TRUE; 2842 2843 c->compressedrow.use = a->compressedrow.use; 2844 c->compressedrow.nrows = a->compressedrow.nrows; 2845 c->compressedrow.checked = a->compressedrow.checked; 2846 if ( a->compressedrow.checked && a->compressedrow.use){ 2847 i = a->compressedrow.nrows; 2848 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2849 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2850 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2851 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2852 } else { 2853 c->compressedrow.use = PETSC_FALSE; 2854 c->compressedrow.i = PETSC_NULL; 2855 c->compressedrow.rindex = PETSC_NULL; 2856 } 2857 C->same_nonzero = A->same_nonzero; 2858 *B = C; 2859 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatLoad_SeqBAIJ" 2865 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2866 { 2867 Mat_SeqBAIJ *a; 2868 Mat B; 2869 PetscErrorCode ierr; 2870 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2871 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2872 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2873 PetscInt *masked,nmask,tmp,bs2,ishift; 2874 PetscMPIInt size; 2875 int fd; 2876 PetscScalar *aa; 2877 MPI_Comm comm = ((PetscObject)viewer)->comm; 2878 2879 PetscFunctionBegin; 2880 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2881 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2882 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2883 bs2 = bs*bs; 2884 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2887 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2889 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2890 M = header[1]; N = header[2]; nz = header[3]; 2891 2892 if (header[3] < 0) { 2893 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2894 } 2895 2896 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2897 2898 /* 2899 This code adds extra rows to make sure the number of rows is 2900 divisible by the blocksize 2901 */ 2902 mbs = M/bs; 2903 extra_rows = bs - M + bs*(mbs); 2904 if (extra_rows == bs) extra_rows = 0; 2905 else mbs++; 2906 if (extra_rows) { 2907 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2908 } 2909 2910 /* read in row lengths */ 2911 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2912 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2913 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2914 2915 /* read in column indices */ 2916 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2917 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2918 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2919 2920 /* loop over row lengths determining block row lengths */ 2921 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2922 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2923 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2924 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2925 masked = mask + mbs; 2926 rowcount = 0; nzcount = 0; 2927 for (i=0; i<mbs; i++) { 2928 nmask = 0; 2929 for (j=0; j<bs; j++) { 2930 kmax = rowlengths[rowcount]; 2931 for (k=0; k<kmax; k++) { 2932 tmp = jj[nzcount++]/bs; 2933 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2934 } 2935 rowcount++; 2936 } 2937 browlengths[i] += nmask; 2938 /* zero out the mask elements we set */ 2939 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2940 } 2941 2942 /* create our matrix */ 2943 ierr = MatCreate(comm,&B); 2944 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2945 ierr = MatSetType(B,type);CHKERRQ(ierr); 2946 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2947 a = (Mat_SeqBAIJ*)B->data; 2948 2949 /* set matrix "i" values */ 2950 a->i[0] = 0; 2951 for (i=1; i<= mbs; i++) { 2952 a->i[i] = a->i[i-1] + browlengths[i-1]; 2953 a->ilen[i-1] = browlengths[i-1]; 2954 } 2955 a->nz = 0; 2956 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2957 2958 /* read in nonzero values */ 2959 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2960 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2961 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2962 2963 /* set "a" and "j" values into matrix */ 2964 nzcount = 0; jcount = 0; 2965 for (i=0; i<mbs; i++) { 2966 nzcountb = nzcount; 2967 nmask = 0; 2968 for (j=0; j<bs; j++) { 2969 kmax = rowlengths[i*bs+j]; 2970 for (k=0; k<kmax; k++) { 2971 tmp = jj[nzcount++]/bs; 2972 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2973 } 2974 } 2975 /* sort the masked values */ 2976 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2977 2978 /* set "j" values into matrix */ 2979 maskcount = 1; 2980 for (j=0; j<nmask; j++) { 2981 a->j[jcount++] = masked[j]; 2982 mask[masked[j]] = maskcount++; 2983 } 2984 /* set "a" values into matrix */ 2985 ishift = bs2*a->i[i]; 2986 for (j=0; j<bs; j++) { 2987 kmax = rowlengths[i*bs+j]; 2988 for (k=0; k<kmax; k++) { 2989 tmp = jj[nzcountb]/bs ; 2990 block = mask[tmp] - 1; 2991 point = jj[nzcountb] - bs*tmp; 2992 idx = ishift + bs2*block + j + bs*point; 2993 a->a[idx] = (MatScalar)aa[nzcountb++]; 2994 } 2995 } 2996 /* zero out the mask elements we set */ 2997 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2998 } 2999 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3000 3001 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3002 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3003 ierr = PetscFree(aa);CHKERRQ(ierr); 3004 ierr = PetscFree(jj);CHKERRQ(ierr); 3005 ierr = PetscFree(mask);CHKERRQ(ierr); 3006 3007 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3008 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3009 ierr = MatView_Private(B);CHKERRQ(ierr); 3010 3011 *A = B; 3012 PetscFunctionReturn(0); 3013 } 3014 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatCreateSeqBAIJ" 3017 /*@C 3018 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3019 compressed row) format. For good matrix assembly performance the 3020 user should preallocate the matrix storage by setting the parameter nz 3021 (or the array nnz). By setting these parameters accurately, performance 3022 during matrix assembly can be increased by more than a factor of 50. 3023 3024 Collective on MPI_Comm 3025 3026 Input Parameters: 3027 + comm - MPI communicator, set to PETSC_COMM_SELF 3028 . bs - size of block 3029 . m - number of rows 3030 . n - number of columns 3031 . nz - number of nonzero blocks per block row (same for all rows) 3032 - nnz - array containing the number of nonzero blocks in the various block rows 3033 (possibly different for each block row) or PETSC_NULL 3034 3035 Output Parameter: 3036 . A - the matrix 3037 3038 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3039 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3040 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3041 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3042 3043 Options Database Keys: 3044 . -mat_no_unroll - uses code that does not unroll the loops in the 3045 block calculations (much slower) 3046 . -mat_block_size - size of the blocks to use 3047 3048 Level: intermediate 3049 3050 Notes: 3051 The number of rows and columns must be divisible by blocksize. 3052 3053 If the nnz parameter is given then the nz parameter is ignored 3054 3055 A nonzero block is any block that as 1 or more nonzeros in it 3056 3057 The block AIJ format is fully compatible with standard Fortran 77 3058 storage. That is, the stored row and column indices can begin at 3059 either one (as in Fortran) or zero. See the users' manual for details. 3060 3061 Specify the preallocated storage with either nz or nnz (not both). 3062 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3063 allocation. For additional details, see the users manual chapter on 3064 matrices. 3065 3066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3067 @*/ 3068 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3069 { 3070 PetscErrorCode ierr; 3071 3072 PetscFunctionBegin; 3073 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3074 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3075 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3076 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3077 PetscFunctionReturn(0); 3078 } 3079 3080 #undef __FUNCT__ 3081 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3082 /*@C 3083 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3084 per row in the matrix. For good matrix assembly performance the 3085 user should preallocate the matrix storage by setting the parameter nz 3086 (or the array nnz). By setting these parameters accurately, performance 3087 during matrix assembly can be increased by more than a factor of 50. 3088 3089 Collective on MPI_Comm 3090 3091 Input Parameters: 3092 + A - the matrix 3093 . bs - size of block 3094 . nz - number of block nonzeros per block row (same for all rows) 3095 - nnz - array containing the number of block nonzeros in the various block rows 3096 (possibly different for each block row) or PETSC_NULL 3097 3098 Options Database Keys: 3099 . -mat_no_unroll - uses code that does not unroll the loops in the 3100 block calculations (much slower) 3101 . -mat_block_size - size of the blocks to use 3102 3103 Level: intermediate 3104 3105 Notes: 3106 If the nnz parameter is given then the nz parameter is ignored 3107 3108 You can call MatGetInfo() to get information on how effective the preallocation was; 3109 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3110 You can also run with the option -info and look for messages with the string 3111 malloc in them to see if additional memory allocation was needed. 3112 3113 The block AIJ format is fully compatible with standard Fortran 77 3114 storage. That is, the stored row and column indices can begin at 3115 either one (as in Fortran) or zero. See the users' manual for details. 3116 3117 Specify the preallocated storage with either nz or nnz (not both). 3118 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3119 allocation. For additional details, see the users manual chapter on 3120 matrices. 3121 3122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3123 @*/ 3124 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3125 { 3126 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3127 3128 PetscFunctionBegin; 3129 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3130 if (f) { 3131 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3132 } 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3138 /*@ 3139 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3140 (upper triangular entries in CSR format) provided by the user. 3141 3142 Collective on MPI_Comm 3143 3144 Input Parameters: 3145 + comm - must be an MPI communicator of size 1 3146 . bs - size of block 3147 . m - number of rows 3148 . n - number of columns 3149 . i - row indices 3150 . j - column indices 3151 - a - matrix values 3152 3153 Output Parameter: 3154 . mat - the matrix 3155 3156 Level: intermediate 3157 3158 Notes: 3159 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3160 once the matrix is destroyed 3161 3162 You cannot set new nonzero locations into this matrix, that will generate an error. 3163 3164 The i and j indices are 0 based 3165 3166 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3167 3168 @*/ 3169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3170 { 3171 PetscErrorCode ierr; 3172 PetscInt ii; 3173 Mat_SeqBAIJ *baij; 3174 3175 PetscFunctionBegin; 3176 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3177 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3178 3179 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3180 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3181 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3182 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3183 baij = (Mat_SeqBAIJ*)(*mat)->data; 3184 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3185 3186 baij->i = i; 3187 baij->j = j; 3188 baij->a = a; 3189 baij->singlemalloc = PETSC_FALSE; 3190 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3191 baij->free_a = PETSC_FALSE; 3192 baij->free_ij = PETSC_FALSE; 3193 3194 for (ii=0; ii<m; ii++) { 3195 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3196 #if defined(PETSC_USE_DEBUG) 3197 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3198 #endif 3199 } 3200 #if defined(PETSC_USE_DEBUG) 3201 for (ii=0; ii<baij->i[m]; ii++) { 3202 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3203 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3204 } 3205 #endif 3206 3207 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 PetscFunctionReturn(0); 3210 } 3211