1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "../src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "../src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 case 6: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 36*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 115 diag += 36; 116 mdiag += 36; 117 } 118 break; 119 default: 120 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 121 } 122 a->idiagvalid = PETSC_TRUE; 123 PetscFunctionReturn(0); 124 } 125 EXTERN_C_END 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 129 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,idx; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 while (nz--) { 166 idx = (*vi++); 167 x1 = x[idx]; 168 s1 -= v[0]*x1; 169 v += 1; 170 } 171 x[i2] = idiag[0]*s1; 172 idiag += 1; 173 i2 += 1; 174 } 175 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 176 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 177 } 178 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 179 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 180 i2 = 0; 181 mdiag = a->idiag+a->mbs; 182 for (i=0; i<m; i++) { 183 x1 = x[i2]; 184 x[i2] = mdiag[0]*x1; 185 mdiag += 1; 186 i2 += 1; 187 } 188 ierr = PetscLogFlops(m);CHKERRQ(ierr); 189 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 190 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 191 } 192 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 193 idiag = a->idiag+a->mbs - 1; 194 i2 = m - 1; 195 x1 = x[i2]; 196 x[i2] = idiag[0]*x1; 197 idiag -= 1; 198 i2 -= 1; 199 for (i=m-2; i>=0; i--) { 200 v = aa + (diag[i]+1); 201 vi = aj + diag[i] + 1; 202 nz = ai[i+1] - diag[i] - 1; 203 s1 = x[i2]; 204 while (nz--) { 205 idx = (*vi++); 206 x1 = x[idx]; 207 s1 -= v[0]*x1; 208 v += 1; 209 } 210 x[i2] = idiag[0]*s1; 211 idiag -= 2; 212 i2 -= 1; 213 } 214 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 215 } 216 } else { 217 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 218 } 219 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 220 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 226 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 227 { 228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 229 PetscScalar *x,x1,x2,s1,s2; 230 const PetscScalar *b; 231 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 232 PetscErrorCode ierr; 233 PetscInt m = a->mbs,i,i2,nz,idx; 234 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 235 236 PetscFunctionBegin; 237 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 238 its = its*lits; 239 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 240 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 241 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 242 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 243 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 244 245 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 246 247 diag = a->diag; 248 idiag = a->idiag; 249 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 251 252 if (flag & SOR_ZERO_INITIAL_GUESS) { 253 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 254 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 255 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 256 i2 = 2; 257 idiag += 4; 258 for (i=1; i<m; i++) { 259 v = aa + 4*ai[i]; 260 vi = aj + ai[i]; 261 nz = diag[i] - ai[i]; 262 s1 = b[i2]; s2 = b[i2+1]; 263 while (nz--) { 264 idx = 2*(*vi++); 265 x1 = x[idx]; x2 = x[1+idx]; 266 s1 -= v[0]*x1 + v[2]*x2; 267 s2 -= v[1]*x1 + v[3]*x2; 268 v += 4; 269 } 270 x[i2] = idiag[0]*s1 + idiag[2]*s2; 271 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 272 idiag += 4; 273 i2 += 2; 274 } 275 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 276 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 277 } 278 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 279 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 280 i2 = 0; 281 mdiag = a->idiag+4*a->mbs; 282 for (i=0; i<m; i++) { 283 x1 = x[i2]; x2 = x[i2+1]; 284 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 285 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 286 mdiag += 4; 287 i2 += 2; 288 } 289 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 290 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 291 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 292 } 293 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 294 idiag = a->idiag+4*a->mbs - 4; 295 i2 = 2*m - 2; 296 x1 = x[i2]; x2 = x[i2+1]; 297 x[i2] = idiag[0]*x1 + idiag[2]*x2; 298 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 299 idiag -= 4; 300 i2 -= 2; 301 for (i=m-2; i>=0; i--) { 302 v = aa + 4*(diag[i]+1); 303 vi = aj + diag[i] + 1; 304 nz = ai[i+1] - diag[i] - 1; 305 s1 = x[i2]; s2 = x[i2+1]; 306 while (nz--) { 307 idx = 2*(*vi++); 308 x1 = x[idx]; x2 = x[1+idx]; 309 s1 -= v[0]*x1 + v[2]*x2; 310 s2 -= v[1]*x1 + v[3]*x2; 311 v += 4; 312 } 313 x[i2] = idiag[0]*s1 + idiag[2]*s2; 314 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 315 idiag -= 4; 316 i2 -= 2; 317 } 318 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 330 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,s1,s2,s3; 334 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 335 const PetscScalar *b; 336 PetscErrorCode ierr; 337 PetscInt m = a->mbs,i,i2,nz,idx; 338 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 339 340 PetscFunctionBegin; 341 its = its*lits; 342 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 343 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 344 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 345 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 346 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 347 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 348 349 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 350 351 diag = a->diag; 352 idiag = a->idiag; 353 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 355 356 if (flag & SOR_ZERO_INITIAL_GUESS) { 357 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 358 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 359 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 360 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 361 i2 = 3; 362 idiag += 9; 363 for (i=1; i<m; i++) { 364 v = aa + 9*ai[i]; 365 vi = aj + ai[i]; 366 nz = diag[i] - ai[i]; 367 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 368 while (nz--) { 369 idx = 3*(*vi++); 370 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 371 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 372 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 373 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 374 v += 9; 375 } 376 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 377 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 378 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 379 idiag += 9; 380 i2 += 3; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+9*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 392 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 393 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 394 mdiag += 9; 395 i2 += 3; 396 } 397 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 398 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 399 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 400 } 401 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 402 idiag = a->idiag+9*a->mbs - 9; 403 i2 = 3*m - 3; 404 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 405 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 406 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 407 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 408 idiag -= 9; 409 i2 -= 3; 410 for (i=m-2; i>=0; i--) { 411 v = aa + 9*(diag[i]+1); 412 vi = aj + diag[i] + 1; 413 nz = ai[i+1] - diag[i] - 1; 414 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 415 while (nz--) { 416 idx = 3*(*vi++); 417 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 418 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 419 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 420 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 421 v += 9; 422 } 423 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 424 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 425 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 426 idiag -= 9; 427 i2 -= 3; 428 } 429 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 430 } 431 } else { 432 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 433 } 434 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 435 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 441 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 442 { 443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 444 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 445 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 446 const PetscScalar *b; 447 PetscErrorCode ierr; 448 PetscInt m = a->mbs,i,i2,nz,idx; 449 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 450 451 PetscFunctionBegin; 452 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 453 its = its*lits; 454 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 455 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 456 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 457 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 458 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 459 460 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 461 462 diag = a->diag; 463 idiag = a->idiag; 464 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 465 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 466 467 if (flag & SOR_ZERO_INITIAL_GUESS) { 468 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 469 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 470 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 471 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 472 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 473 i2 = 4; 474 idiag += 16; 475 for (i=1; i<m; i++) { 476 v = aa + 16*ai[i]; 477 vi = aj + ai[i]; 478 nz = diag[i] - ai[i]; 479 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 480 while (nz--) { 481 idx = 4*(*vi++); 482 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 483 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 484 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 485 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 486 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 487 v += 16; 488 } 489 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 490 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 491 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 492 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 493 idiag += 16; 494 i2 += 4; 495 } 496 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 497 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 498 } 499 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 500 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 501 i2 = 0; 502 mdiag = a->idiag+16*a->mbs; 503 for (i=0; i<m; i++) { 504 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 505 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 506 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 507 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 508 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 509 mdiag += 16; 510 i2 += 4; 511 } 512 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 513 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 514 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 515 } 516 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 517 idiag = a->idiag+16*a->mbs - 16; 518 i2 = 4*m - 4; 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 521 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 522 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 523 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 524 idiag -= 16; 525 i2 -= 4; 526 for (i=m-2; i>=0; i--) { 527 v = aa + 16*(diag[i]+1); 528 vi = aj + diag[i] + 1; 529 nz = ai[i+1] - diag[i] - 1; 530 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 531 while (nz--) { 532 idx = 4*(*vi++); 533 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 534 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 535 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 536 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 537 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 538 v += 16; 539 } 540 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 541 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 542 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 543 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 544 idiag -= 16; 545 i2 -= 4; 546 } 547 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 548 } 549 } else { 550 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 551 } 552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 553 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 559 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 560 { 561 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 562 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 563 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 564 const PetscScalar *b; 565 PetscErrorCode ierr; 566 PetscInt m = a->mbs,i,i2,nz,idx; 567 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 568 569 PetscFunctionBegin; 570 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 571 its = its*lits; 572 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 573 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 574 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 575 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 576 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 577 578 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 579 580 diag = a->diag; 581 idiag = a->idiag; 582 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 583 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 584 585 if (flag & SOR_ZERO_INITIAL_GUESS) { 586 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 587 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 588 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 589 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 590 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 591 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 592 i2 = 5; 593 idiag += 25; 594 for (i=1; i<m; i++) { 595 v = aa + 25*ai[i]; 596 vi = aj + ai[i]; 597 nz = diag[i] - ai[i]; 598 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 599 while (nz--) { 600 idx = 5*(*vi++); 601 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 602 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 603 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 604 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 605 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 606 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 607 v += 25; 608 } 609 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 610 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 611 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 612 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 613 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 614 idiag += 25; 615 i2 += 5; 616 } 617 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 618 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 619 } 620 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 621 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 622 i2 = 0; 623 mdiag = a->idiag+25*a->mbs; 624 for (i=0; i<m; i++) { 625 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 626 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 627 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 628 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 629 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 630 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 631 mdiag += 25; 632 i2 += 5; 633 } 634 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 635 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 636 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 637 } 638 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 639 idiag = a->idiag+25*a->mbs - 25; 640 i2 = 5*m - 5; 641 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 642 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 643 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 644 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 645 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 646 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 647 idiag -= 25; 648 i2 -= 5; 649 for (i=m-2; i>=0; i--) { 650 v = aa + 25*(diag[i]+1); 651 vi = aj + diag[i] + 1; 652 nz = ai[i+1] - diag[i] - 1; 653 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 657 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 658 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 659 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 660 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 661 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 662 v += 25; 663 } 664 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 665 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 666 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 667 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 668 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 669 idiag -= 25; 670 i2 -= 5; 671 } 672 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 673 } 674 } else { 675 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 676 } 677 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 678 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 684 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 685 { 686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 687 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 688 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 689 const PetscScalar *b; 690 PetscErrorCode ierr; 691 PetscInt m = a->mbs,i,i2,nz,idx; 692 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 693 694 PetscFunctionBegin; 695 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 696 its = its*lits; 697 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 698 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 699 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 700 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 701 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 702 703 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 704 705 diag = a->diag; 706 idiag = a->idiag; 707 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 708 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 709 710 if (flag & SOR_ZERO_INITIAL_GUESS) { 711 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 712 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 713 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 714 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 715 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 716 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 717 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 718 i2 = 6; 719 idiag += 36; 720 for (i=1; i<m; i++) { 721 v = aa + 36*ai[i]; 722 vi = aj + ai[i]; 723 nz = diag[i] - ai[i]; 724 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 725 while (nz--) { 726 idx = 6*(*vi++); 727 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 728 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 729 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 730 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 731 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 732 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 733 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 734 v += 36; 735 } 736 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 737 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 738 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 739 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 740 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 741 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 742 idiag += 36; 743 i2 += 6; 744 } 745 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 746 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 747 } 748 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 749 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 750 i2 = 0; 751 mdiag = a->idiag+36*a->mbs; 752 for (i=0; i<m; i++) { 753 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 754 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 755 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 756 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 757 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 758 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 759 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 760 mdiag += 36; 761 i2 += 6; 762 } 763 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 764 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 765 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 766 } 767 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 768 idiag = a->idiag+36*a->mbs - 36; 769 i2 = 6*m - 6; 770 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 771 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 772 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 773 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 774 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 775 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 776 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 777 idiag -= 36; 778 i2 -= 6; 779 for (i=m-2; i>=0; i--) { 780 v = aa + 36*(diag[i]+1); 781 vi = aj + diag[i] + 1; 782 nz = ai[i+1] - diag[i] - 1; 783 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 784 while (nz--) { 785 idx = 6*(*vi++); 786 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 787 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 788 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 789 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 790 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 791 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 792 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 793 v += 36; 794 } 795 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 796 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 797 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 798 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 799 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 800 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 801 idiag -= 36; 802 i2 -= 6; 803 } 804 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 805 } 806 } else { 807 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 808 } 809 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 810 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 816 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 817 { 818 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 819 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 820 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 821 const PetscScalar *b; 822 PetscErrorCode ierr; 823 PetscInt m = a->mbs,i,i2,nz,idx; 824 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 825 826 PetscFunctionBegin; 827 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 828 its = its*lits; 829 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 830 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 831 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 832 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 833 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 834 835 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 836 837 diag = a->diag; 838 idiag = a->idiag; 839 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 840 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 841 842 if (flag & SOR_ZERO_INITIAL_GUESS) { 843 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 844 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 845 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 846 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 847 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 848 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 849 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 850 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 851 i2 = 7; 852 idiag += 49; 853 for (i=1; i<m; i++) { 854 v = aa + 49*ai[i]; 855 vi = aj + ai[i]; 856 nz = diag[i] - ai[i]; 857 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 858 while (nz--) { 859 idx = 7*(*vi++); 860 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 861 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 862 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 863 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 864 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 865 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 866 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 867 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 868 v += 49; 869 } 870 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 871 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 872 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 873 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 874 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 875 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 876 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 877 idiag += 49; 878 i2 += 7; 879 } 880 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 881 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 882 } 883 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 884 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 885 i2 = 0; 886 mdiag = a->idiag+49*a->mbs; 887 for (i=0; i<m; i++) { 888 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 889 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 890 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 891 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 892 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 893 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 894 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 895 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 896 mdiag += 36; 897 i2 += 6; 898 } 899 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 900 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 901 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 902 } 903 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 904 idiag = a->idiag+49*a->mbs - 49; 905 i2 = 7*m - 7; 906 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 907 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 908 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 909 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 910 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 911 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 912 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 913 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 914 idiag -= 49; 915 i2 -= 7; 916 for (i=m-2; i>=0; i--) { 917 v = aa + 49*(diag[i]+1); 918 vi = aj + diag[i] + 1; 919 nz = ai[i+1] - diag[i] - 1; 920 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 921 while (nz--) { 922 idx = 7*(*vi++); 923 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 924 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 925 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 926 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 927 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 928 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 929 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 930 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 931 v += 49; 932 } 933 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 934 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 935 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 936 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 937 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 938 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 939 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 940 idiag -= 49; 941 i2 -= 7; 942 } 943 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 944 } 945 } else { 946 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 947 } 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 /* 954 Special version for direct calls from Fortran (Used in PETSc-fun3d) 955 */ 956 #if defined(PETSC_HAVE_FORTRAN_CAPS) 957 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 958 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 959 #define matsetvaluesblocked4_ matsetvaluesblocked4 960 #endif 961 962 EXTERN_C_BEGIN 963 #undef __FUNCT__ 964 #define __FUNCT__ "matsetvaluesblocked4_" 965 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 966 { 967 Mat A = *AA; 968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 969 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 970 PetscInt *ai=a->i,*ailen=a->ilen; 971 PetscInt *aj=a->j,stepval,lastcol = -1; 972 const PetscScalar *value = v; 973 MatScalar *ap,*aa = a->a,*bap; 974 975 PetscFunctionBegin; 976 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 977 stepval = (n-1)*4; 978 for (k=0; k<m; k++) { /* loop over added rows */ 979 row = im[k]; 980 rp = aj + ai[row]; 981 ap = aa + 16*ai[row]; 982 nrow = ailen[row]; 983 low = 0; 984 high = nrow; 985 for (l=0; l<n; l++) { /* loop over added columns */ 986 col = in[l]; 987 if (col <= lastcol) low = 0; else high = nrow; 988 lastcol = col; 989 value = v + k*(stepval+4 + l)*4; 990 while (high-low > 7) { 991 t = (low+high)/2; 992 if (rp[t] > col) high = t; 993 else low = t; 994 } 995 for (i=low; i<high; i++) { 996 if (rp[i] > col) break; 997 if (rp[i] == col) { 998 bap = ap + 16*i; 999 for (ii=0; ii<4; ii++,value+=stepval) { 1000 for (jj=ii; jj<16; jj+=4) { 1001 bap[jj] += *value++; 1002 } 1003 } 1004 goto noinsert2; 1005 } 1006 } 1007 N = nrow++ - 1; 1008 high++; /* added new column index thus must search to one higher than before */ 1009 /* shift up all the later entries in this row */ 1010 for (ii=N; ii>=i; ii--) { 1011 rp[ii+1] = rp[ii]; 1012 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1013 } 1014 if (N >= i) { 1015 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1016 } 1017 rp[i] = col; 1018 bap = ap + 16*i; 1019 for (ii=0; ii<4; ii++,value+=stepval) { 1020 for (jj=ii; jj<16; jj+=4) { 1021 bap[jj] = *value++; 1022 } 1023 } 1024 noinsert2:; 1025 low = i; 1026 } 1027 ailen[row] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 EXTERN_C_END 1032 1033 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1034 #define matsetvalues4_ MATSETVALUES4 1035 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1036 #define matsetvalues4_ matsetvalues4 1037 #endif 1038 1039 EXTERN_C_BEGIN 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatSetValues4_" 1042 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1043 { 1044 Mat A = *AA; 1045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1047 PetscInt *ai=a->i,*ailen=a->ilen; 1048 PetscInt *aj=a->j,brow,bcol; 1049 PetscInt ridx,cidx,lastcol = -1; 1050 MatScalar *ap,value,*aa=a->a,*bap; 1051 1052 PetscFunctionBegin; 1053 for (k=0; k<m; k++) { /* loop over added rows */ 1054 row = im[k]; brow = row/4; 1055 rp = aj + ai[brow]; 1056 ap = aa + 16*ai[brow]; 1057 nrow = ailen[brow]; 1058 low = 0; 1059 high = nrow; 1060 for (l=0; l<n; l++) { /* loop over added columns */ 1061 col = in[l]; bcol = col/4; 1062 ridx = row % 4; cidx = col % 4; 1063 value = v[l + k*n]; 1064 if (col <= lastcol) low = 0; else high = nrow; 1065 lastcol = col; 1066 while (high-low > 7) { 1067 t = (low+high)/2; 1068 if (rp[t] > bcol) high = t; 1069 else low = t; 1070 } 1071 for (i=low; i<high; i++) { 1072 if (rp[i] > bcol) break; 1073 if (rp[i] == bcol) { 1074 bap = ap + 16*i + 4*cidx + ridx; 1075 *bap += value; 1076 goto noinsert1; 1077 } 1078 } 1079 N = nrow++ - 1; 1080 high++; /* added new column thus must search to one higher than before */ 1081 /* shift up all the later entries in this row */ 1082 for (ii=N; ii>=i; ii--) { 1083 rp[ii+1] = rp[ii]; 1084 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1085 } 1086 if (N>=i) { 1087 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1088 } 1089 rp[i] = bcol; 1090 ap[16*i + 4*cidx + ridx] = value; 1091 noinsert1:; 1092 low = i; 1093 } 1094 ailen[brow] = nrow; 1095 } 1096 PetscFunctionReturnVoid(); 1097 } 1098 EXTERN_C_END 1099 1100 /* 1101 Checks for missing diagonals 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1105 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1106 { 1107 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 PetscInt *diag,*jj = a->j,i; 1110 1111 PetscFunctionBegin; 1112 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1113 *missing = PETSC_FALSE; 1114 if (A->rmap->n > 0 && !jj) { 1115 *missing = PETSC_TRUE; 1116 if (d) *d = 0; 1117 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1118 } else { 1119 diag = a->diag; 1120 for (i=0; i<a->mbs; i++) { 1121 if (jj[diag[i]] != i) { 1122 *missing = PETSC_TRUE; 1123 if (d) *d = i; 1124 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1125 } 1126 } 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1133 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 PetscInt i,j,m = a->mbs; 1138 1139 PetscFunctionBegin; 1140 if (!a->diag) { 1141 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1142 } 1143 for (i=0; i<m; i++) { 1144 a->diag[i] = a->i[i+1]; 1145 for (j=a->i[i]; j<a->i[i+1]; j++) { 1146 if (a->j[j] == i) { 1147 a->diag[i] = j; 1148 break; 1149 } 1150 } 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 1156 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1160 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1161 { 1162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1163 PetscErrorCode ierr; 1164 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1165 PetscInt *tia, *tja; 1166 1167 PetscFunctionBegin; 1168 *nn = n; 1169 if (!ia) PetscFunctionReturn(0); 1170 if (symmetric) { 1171 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1172 } else { 1173 tia = a->i; tja = a->j; 1174 } 1175 1176 if (!blockcompressed && bs > 1) { 1177 (*nn) *= bs; 1178 nbs = bs; 1179 /* malloc & create the natural set of indices */ 1180 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1181 if (n) { 1182 (*ia)[0] = 0; 1183 for (j=1; j<bs; j++) { 1184 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1185 } 1186 } 1187 1188 for (i=1; i<n; i++) { 1189 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1190 for (j=1; j<bs; j++) { 1191 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1192 } 1193 } 1194 if (n) { 1195 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1196 } 1197 1198 if (ja) { 1199 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1200 cnt = 0; 1201 for (i=0; i<n; i++) { 1202 for (j=0; j<bs; j++) { 1203 for (k=tia[i]; k<tia[i+1]; k++) { 1204 for (l=0; l<bs; l++) { 1205 (*ja)[cnt++] = bs*tja[k] + l; 1206 } 1207 } 1208 } 1209 } 1210 } 1211 1212 n *= bs; 1213 nz *= bs*bs; 1214 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1215 ierr = PetscFree(tia);CHKERRQ(ierr); 1216 ierr = PetscFree(tja);CHKERRQ(ierr); 1217 } 1218 } else { 1219 *ia = tia; 1220 if (ja) *ja = tja; 1221 } 1222 if (oshift == 1) { 1223 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1224 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1231 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1232 { 1233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 PetscInt i,n = a->mbs,nz = a->i[n]; 1236 1237 PetscFunctionBegin; 1238 if (!ia) PetscFunctionReturn(0); 1239 if (!blockcompressed && A->rmap->bs > 1) { 1240 ierr = PetscFree(*ia);CHKERRQ(ierr); 1241 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1242 } else if (symmetric) { 1243 ierr = PetscFree(*ia);CHKERRQ(ierr); 1244 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1245 } else if (oshift == 1) { /* blockcompressed */ 1246 for (i=0; i<n+1; i++) a->i[i]--; 1247 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1254 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1255 { 1256 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 #if defined(PETSC_USE_LOG) 1261 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1262 #endif 1263 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1264 if (a->row) { 1265 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1266 } 1267 if (a->col) { 1268 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1271 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1272 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1273 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1274 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1275 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1276 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1277 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1278 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1279 1280 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1281 ierr = PetscFree(a);CHKERRQ(ierr); 1282 1283 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1297 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1298 { 1299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 switch (op) { 1304 case MAT_ROW_ORIENTED: 1305 a->roworiented = flg; 1306 break; 1307 case MAT_KEEP_NONZERO_PATTERN: 1308 a->keepnonzeropattern = flg; 1309 break; 1310 case MAT_NEW_NONZERO_LOCATIONS: 1311 a->nonew = (flg ? 0 : 1); 1312 break; 1313 case MAT_NEW_NONZERO_LOCATION_ERR: 1314 a->nonew = (flg ? -1 : 0); 1315 break; 1316 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1317 a->nonew = (flg ? -2 : 0); 1318 break; 1319 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1320 a->nounused = (flg ? -1 : 0); 1321 break; 1322 case MAT_NEW_DIAGONALS: 1323 case MAT_IGNORE_OFF_PROC_ENTRIES: 1324 case MAT_USE_HASH_TABLE: 1325 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1326 break; 1327 case MAT_SYMMETRIC: 1328 case MAT_STRUCTURALLY_SYMMETRIC: 1329 case MAT_HERMITIAN: 1330 case MAT_SYMMETRY_ETERNAL: 1331 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1332 break; 1333 default: 1334 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1335 } 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1341 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1346 MatScalar *aa,*aa_i; 1347 PetscScalar *v_i; 1348 1349 PetscFunctionBegin; 1350 bs = A->rmap->bs; 1351 ai = a->i; 1352 aj = a->j; 1353 aa = a->a; 1354 bs2 = a->bs2; 1355 1356 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1357 1358 bn = row/bs; /* Block number */ 1359 bp = row % bs; /* Block Position */ 1360 M = ai[bn+1] - ai[bn]; 1361 *nz = bs*M; 1362 1363 if (v) { 1364 *v = 0; 1365 if (*nz) { 1366 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1367 for (i=0; i<M; i++) { /* for each block in the block row */ 1368 v_i = *v + i*bs; 1369 aa_i = aa + bs2*(ai[bn] + i); 1370 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1371 } 1372 } 1373 } 1374 1375 if (idx) { 1376 *idx = 0; 1377 if (*nz) { 1378 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1379 for (i=0; i<M; i++) { /* for each block in the block row */ 1380 idx_i = *idx + i*bs; 1381 itmp = bs*aj[ai[bn] + i]; 1382 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1383 } 1384 } 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1391 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1392 { 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1397 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1403 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1404 { 1405 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1406 Mat C; 1407 PetscErrorCode ierr; 1408 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1409 PetscInt *rows,*cols,bs2=a->bs2; 1410 MatScalar *array; 1411 1412 PetscFunctionBegin; 1413 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1414 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1415 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1416 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1417 1418 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1419 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1420 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1421 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1422 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1423 ierr = PetscFree(col);CHKERRQ(ierr); 1424 } else { 1425 C = *B; 1426 } 1427 1428 array = a->a; 1429 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1430 cols = rows + bs; 1431 for (i=0; i<mbs; i++) { 1432 cols[0] = i*bs; 1433 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1434 len = ai[i+1] - ai[i]; 1435 for (j=0; j<len; j++) { 1436 rows[0] = (*aj++)*bs; 1437 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1438 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1439 array += bs2; 1440 } 1441 } 1442 ierr = PetscFree(rows);CHKERRQ(ierr); 1443 1444 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 1447 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1448 *B = C; 1449 } else { 1450 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1457 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1458 { 1459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1460 PetscErrorCode ierr; 1461 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1462 int fd; 1463 PetscScalar *aa; 1464 FILE *file; 1465 1466 PetscFunctionBegin; 1467 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1468 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1469 col_lens[0] = MAT_FILE_COOKIE; 1470 1471 col_lens[1] = A->rmap->N; 1472 col_lens[2] = A->cmap->n; 1473 col_lens[3] = a->nz*bs2; 1474 1475 /* store lengths of each row and write (including header) to file */ 1476 count = 0; 1477 for (i=0; i<a->mbs; i++) { 1478 for (j=0; j<bs; j++) { 1479 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1480 } 1481 } 1482 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1483 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1484 1485 /* store column indices (zero start index) */ 1486 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1487 count = 0; 1488 for (i=0; i<a->mbs; i++) { 1489 for (j=0; j<bs; j++) { 1490 for (k=a->i[i]; k<a->i[i+1]; k++) { 1491 for (l=0; l<bs; l++) { 1492 jj[count++] = bs*a->j[k] + l; 1493 } 1494 } 1495 } 1496 } 1497 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1498 ierr = PetscFree(jj);CHKERRQ(ierr); 1499 1500 /* store nonzero values */ 1501 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1502 count = 0; 1503 for (i=0; i<a->mbs; i++) { 1504 for (j=0; j<bs; j++) { 1505 for (k=a->i[i]; k<a->i[i+1]; k++) { 1506 for (l=0; l<bs; l++) { 1507 aa[count++] = a->a[bs2*k + l*bs + j]; 1508 } 1509 } 1510 } 1511 } 1512 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1513 ierr = PetscFree(aa);CHKERRQ(ierr); 1514 1515 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1516 if (file) { 1517 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1524 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1525 { 1526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1527 PetscErrorCode ierr; 1528 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1529 PetscViewerFormat format; 1530 1531 PetscFunctionBegin; 1532 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1533 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1534 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1535 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1536 Mat aij; 1537 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1538 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1539 ierr = MatDestroy(aij);CHKERRQ(ierr); 1540 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1541 PetscFunctionReturn(0); 1542 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1543 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1544 for (i=0; i<a->mbs; i++) { 1545 for (j=0; j<bs; j++) { 1546 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1547 for (k=a->i[i]; k<a->i[i+1]; k++) { 1548 for (l=0; l<bs; l++) { 1549 #if defined(PETSC_USE_COMPLEX) 1550 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1552 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1555 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1556 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1558 } 1559 #else 1560 if (a->a[bs2*k + l*bs + j] != 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1562 } 1563 #endif 1564 } 1565 } 1566 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1567 } 1568 } 1569 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1570 } else { 1571 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1572 for (i=0; i<a->mbs; i++) { 1573 for (j=0; j<bs; j++) { 1574 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1575 for (k=a->i[i]; k<a->i[i+1]; k++) { 1576 for (l=0; l<bs; l++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1580 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1581 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1582 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1583 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1584 } else { 1585 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1586 } 1587 #else 1588 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1589 #endif 1590 } 1591 } 1592 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1593 } 1594 } 1595 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1596 } 1597 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1603 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1604 { 1605 Mat A = (Mat) Aa; 1606 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1607 PetscErrorCode ierr; 1608 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1609 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1610 MatScalar *aa; 1611 PetscViewer viewer; 1612 1613 PetscFunctionBegin; 1614 1615 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1616 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1617 1618 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1619 1620 /* loop over matrix elements drawing boxes */ 1621 color = PETSC_DRAW_BLUE; 1622 for (i=0,row=0; i<mbs; i++,row+=bs) { 1623 for (j=a->i[i]; j<a->i[i+1]; j++) { 1624 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1625 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1626 aa = a->a + j*bs2; 1627 for (k=0; k<bs; k++) { 1628 for (l=0; l<bs; l++) { 1629 if (PetscRealPart(*aa++) >= 0.) continue; 1630 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1631 } 1632 } 1633 } 1634 } 1635 color = PETSC_DRAW_CYAN; 1636 for (i=0,row=0; i<mbs; i++,row+=bs) { 1637 for (j=a->i[i]; j<a->i[i+1]; j++) { 1638 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1639 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1640 aa = a->a + j*bs2; 1641 for (k=0; k<bs; k++) { 1642 for (l=0; l<bs; l++) { 1643 if (PetscRealPart(*aa++) != 0.) continue; 1644 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1645 } 1646 } 1647 } 1648 } 1649 1650 color = PETSC_DRAW_RED; 1651 for (i=0,row=0; i<mbs; i++,row+=bs) { 1652 for (j=a->i[i]; j<a->i[i+1]; j++) { 1653 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1654 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1655 aa = a->a + j*bs2; 1656 for (k=0; k<bs; k++) { 1657 for (l=0; l<bs; l++) { 1658 if (PetscRealPart(*aa++) <= 0.) continue; 1659 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1660 } 1661 } 1662 } 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1669 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1670 { 1671 PetscErrorCode ierr; 1672 PetscReal xl,yl,xr,yr,w,h; 1673 PetscDraw draw; 1674 PetscTruth isnull; 1675 1676 PetscFunctionBegin; 1677 1678 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1679 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1680 1681 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1682 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1683 xr += w; yr += h; xl = -w; yl = -h; 1684 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1685 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1686 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatView_SeqBAIJ" 1692 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1693 { 1694 PetscErrorCode ierr; 1695 PetscTruth iascii,isbinary,isdraw; 1696 1697 PetscFunctionBegin; 1698 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1699 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1700 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1701 if (iascii){ 1702 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1703 } else if (isbinary) { 1704 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1705 } else if (isdraw) { 1706 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1707 } else { 1708 Mat B; 1709 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1710 ierr = MatView(B,viewer);CHKERRQ(ierr); 1711 ierr = MatDestroy(B);CHKERRQ(ierr); 1712 } 1713 PetscFunctionReturn(0); 1714 } 1715 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1719 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1720 { 1721 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1722 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1723 PetscInt *ai = a->i,*ailen = a->ilen; 1724 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1725 MatScalar *ap,*aa = a->a; 1726 1727 PetscFunctionBegin; 1728 for (k=0; k<m; k++) { /* loop over rows */ 1729 row = im[k]; brow = row/bs; 1730 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1731 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1732 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1733 nrow = ailen[brow]; 1734 for (l=0; l<n; l++) { /* loop over columns */ 1735 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1736 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1737 col = in[l] ; 1738 bcol = col/bs; 1739 cidx = col%bs; 1740 ridx = row%bs; 1741 high = nrow; 1742 low = 0; /* assume unsorted */ 1743 while (high-low > 5) { 1744 t = (low+high)/2; 1745 if (rp[t] > bcol) high = t; 1746 else low = t; 1747 } 1748 for (i=low; i<high; i++) { 1749 if (rp[i] > bcol) break; 1750 if (rp[i] == bcol) { 1751 *v++ = ap[bs2*i+bs*cidx+ridx]; 1752 goto finished; 1753 } 1754 } 1755 *v++ = 0.0; 1756 finished:; 1757 } 1758 } 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #define CHUNKSIZE 10 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1765 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1766 { 1767 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1768 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1769 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1770 PetscErrorCode ierr; 1771 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1772 PetscTruth roworiented=a->roworiented; 1773 const PetscScalar *value = v; 1774 MatScalar *ap,*aa = a->a,*bap; 1775 1776 PetscFunctionBegin; 1777 if (roworiented) { 1778 stepval = (n-1)*bs; 1779 } else { 1780 stepval = (m-1)*bs; 1781 } 1782 for (k=0; k<m; k++) { /* loop over added rows */ 1783 row = im[k]; 1784 if (row < 0) continue; 1785 #if defined(PETSC_USE_DEBUG) 1786 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1787 #endif 1788 rp = aj + ai[row]; 1789 ap = aa + bs2*ai[row]; 1790 rmax = imax[row]; 1791 nrow = ailen[row]; 1792 low = 0; 1793 high = nrow; 1794 for (l=0; l<n; l++) { /* loop over added columns */ 1795 if (in[l] < 0) continue; 1796 #if defined(PETSC_USE_DEBUG) 1797 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1798 #endif 1799 col = in[l]; 1800 if (roworiented) { 1801 value = v + k*(stepval+bs)*bs + l*bs; 1802 } else { 1803 value = v + l*(stepval+bs)*bs + k*bs; 1804 } 1805 if (col <= lastcol) low = 0; else high = nrow; 1806 lastcol = col; 1807 while (high-low > 7) { 1808 t = (low+high)/2; 1809 if (rp[t] > col) high = t; 1810 else low = t; 1811 } 1812 for (i=low; i<high; i++) { 1813 if (rp[i] > col) break; 1814 if (rp[i] == col) { 1815 bap = ap + bs2*i; 1816 if (roworiented) { 1817 if (is == ADD_VALUES) { 1818 for (ii=0; ii<bs; ii++,value+=stepval) { 1819 for (jj=ii; jj<bs2; jj+=bs) { 1820 bap[jj] += *value++; 1821 } 1822 } 1823 } else { 1824 for (ii=0; ii<bs; ii++,value+=stepval) { 1825 for (jj=ii; jj<bs2; jj+=bs) { 1826 bap[jj] = *value++; 1827 } 1828 } 1829 } 1830 } else { 1831 if (is == ADD_VALUES) { 1832 for (ii=0; ii<bs; ii++,value+=stepval) { 1833 for (jj=0; jj<bs; jj++) { 1834 *bap++ += *value++; 1835 } 1836 } 1837 } else { 1838 for (ii=0; ii<bs; ii++,value+=stepval) { 1839 for (jj=0; jj<bs; jj++) { 1840 *bap++ = *value++; 1841 } 1842 } 1843 } 1844 } 1845 goto noinsert2; 1846 } 1847 } 1848 if (nonew == 1) goto noinsert2; 1849 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1850 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1851 N = nrow++ - 1; high++; 1852 /* shift up all the later entries in this row */ 1853 for (ii=N; ii>=i; ii--) { 1854 rp[ii+1] = rp[ii]; 1855 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1856 } 1857 if (N >= i) { 1858 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1859 } 1860 rp[i] = col; 1861 bap = ap + bs2*i; 1862 if (roworiented) { 1863 for (ii=0; ii<bs; ii++,value+=stepval) { 1864 for (jj=ii; jj<bs2; jj+=bs) { 1865 bap[jj] = *value++; 1866 } 1867 } 1868 } else { 1869 for (ii=0; ii<bs; ii++,value+=stepval) { 1870 for (jj=0; jj<bs; jj++) { 1871 *bap++ = *value++; 1872 } 1873 } 1874 } 1875 noinsert2:; 1876 low = i; 1877 } 1878 ailen[row] = nrow; 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1885 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1886 { 1887 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1888 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1889 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1890 PetscErrorCode ierr; 1891 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1892 MatScalar *aa = a->a,*ap; 1893 PetscReal ratio=0.6; 1894 1895 PetscFunctionBegin; 1896 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1897 1898 if (m) rmax = ailen[0]; 1899 for (i=1; i<mbs; i++) { 1900 /* move each row back by the amount of empty slots (fshift) before it*/ 1901 fshift += imax[i-1] - ailen[i-1]; 1902 rmax = PetscMax(rmax,ailen[i]); 1903 if (fshift) { 1904 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1905 N = ailen[i]; 1906 for (j=0; j<N; j++) { 1907 ip[j-fshift] = ip[j]; 1908 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1909 } 1910 } 1911 ai[i] = ai[i-1] + ailen[i-1]; 1912 } 1913 if (mbs) { 1914 fshift += imax[mbs-1] - ailen[mbs-1]; 1915 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1916 } 1917 /* reset ilen and imax for each row */ 1918 for (i=0; i<mbs; i++) { 1919 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1920 } 1921 a->nz = ai[mbs]; 1922 1923 /* diagonals may have moved, so kill the diagonal pointers */ 1924 a->idiagvalid = PETSC_FALSE; 1925 if (fshift && a->diag) { 1926 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1927 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1928 a->diag = 0; 1929 } 1930 if (fshift && a->nounused == -1) { 1931 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1932 } 1933 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1934 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1935 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1936 a->reallocs = 0; 1937 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1938 1939 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1940 if (a->compressedrow.use){ 1941 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1942 } 1943 1944 A->same_nonzero = PETSC_TRUE; 1945 PetscFunctionReturn(0); 1946 } 1947 1948 /* 1949 This function returns an array of flags which indicate the locations of contiguous 1950 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1951 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1952 Assume: sizes should be long enough to hold all the values. 1953 */ 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1956 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1957 { 1958 PetscInt i,j,k,row; 1959 PetscTruth flg; 1960 1961 PetscFunctionBegin; 1962 for (i=0,j=0; i<n; j++) { 1963 row = idx[i]; 1964 if (row%bs!=0) { /* Not the begining of a block */ 1965 sizes[j] = 1; 1966 i++; 1967 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1968 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1969 i++; 1970 } else { /* Begining of the block, so check if the complete block exists */ 1971 flg = PETSC_TRUE; 1972 for (k=1; k<bs; k++) { 1973 if (row+k != idx[i+k]) { /* break in the block */ 1974 flg = PETSC_FALSE; 1975 break; 1976 } 1977 } 1978 if (flg) { /* No break in the bs */ 1979 sizes[j] = bs; 1980 i+= bs; 1981 } else { 1982 sizes[j] = 1; 1983 i++; 1984 } 1985 } 1986 } 1987 *bs_max = j; 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1993 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1994 { 1995 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1996 PetscErrorCode ierr; 1997 PetscInt i,j,k,count,*rows; 1998 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1999 PetscScalar zero = 0.0; 2000 MatScalar *aa; 2001 2002 PetscFunctionBegin; 2003 /* Make a copy of the IS and sort it */ 2004 /* allocate memory for rows,sizes */ 2005 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2006 sizes = rows + is_n; 2007 2008 /* copy IS values to rows, and sort them */ 2009 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2010 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2011 if (baij->keepnonzeropattern) { 2012 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2013 bs_max = is_n; 2014 A->same_nonzero = PETSC_TRUE; 2015 } else { 2016 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2017 A->same_nonzero = PETSC_FALSE; 2018 } 2019 2020 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2021 row = rows[j]; 2022 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2023 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2024 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2025 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2026 if (diag != 0.0) { 2027 if (baij->ilen[row/bs] > 0) { 2028 baij->ilen[row/bs] = 1; 2029 baij->j[baij->i[row/bs]] = row/bs; 2030 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2031 } 2032 /* Now insert all the diagonal values for this bs */ 2033 for (k=0; k<bs; k++) { 2034 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2035 } 2036 } else { /* (diag == 0.0) */ 2037 baij->ilen[row/bs] = 0; 2038 } /* end (diag == 0.0) */ 2039 } else { /* (sizes[i] != bs) */ 2040 #if defined (PETSC_USE_DEBUG) 2041 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2042 #endif 2043 for (k=0; k<count; k++) { 2044 aa[0] = zero; 2045 aa += bs; 2046 } 2047 if (diag != 0.0) { 2048 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2049 } 2050 } 2051 } 2052 2053 ierr = PetscFree(rows);CHKERRQ(ierr); 2054 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2060 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2061 { 2062 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2063 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2064 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2065 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2066 PetscErrorCode ierr; 2067 PetscInt ridx,cidx,bs2=a->bs2; 2068 PetscTruth roworiented=a->roworiented; 2069 MatScalar *ap,value,*aa=a->a,*bap; 2070 2071 PetscFunctionBegin; 2072 for (k=0; k<m; k++) { /* loop over added rows */ 2073 row = im[k]; 2074 brow = row/bs; 2075 if (row < 0) continue; 2076 #if defined(PETSC_USE_DEBUG) 2077 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2078 #endif 2079 rp = aj + ai[brow]; 2080 ap = aa + bs2*ai[brow]; 2081 rmax = imax[brow]; 2082 nrow = ailen[brow]; 2083 low = 0; 2084 high = nrow; 2085 for (l=0; l<n; l++) { /* loop over added columns */ 2086 if (in[l] < 0) continue; 2087 #if defined(PETSC_USE_DEBUG) 2088 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2089 #endif 2090 col = in[l]; bcol = col/bs; 2091 ridx = row % bs; cidx = col % bs; 2092 if (roworiented) { 2093 value = v[l + k*n]; 2094 } else { 2095 value = v[k + l*m]; 2096 } 2097 if (col <= lastcol) low = 0; else high = nrow; 2098 lastcol = col; 2099 while (high-low > 7) { 2100 t = (low+high)/2; 2101 if (rp[t] > bcol) high = t; 2102 else low = t; 2103 } 2104 for (i=low; i<high; i++) { 2105 if (rp[i] > bcol) break; 2106 if (rp[i] == bcol) { 2107 bap = ap + bs2*i + bs*cidx + ridx; 2108 if (is == ADD_VALUES) *bap += value; 2109 else *bap = value; 2110 goto noinsert1; 2111 } 2112 } 2113 if (nonew == 1) goto noinsert1; 2114 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2115 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2116 N = nrow++ - 1; high++; 2117 /* shift up all the later entries in this row */ 2118 for (ii=N; ii>=i; ii--) { 2119 rp[ii+1] = rp[ii]; 2120 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2121 } 2122 if (N>=i) { 2123 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2124 } 2125 rp[i] = bcol; 2126 ap[bs2*i + bs*cidx + ridx] = value; 2127 a->nz++; 2128 noinsert1:; 2129 low = i; 2130 } 2131 ailen[brow] = nrow; 2132 } 2133 A->same_nonzero = PETSC_FALSE; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2141 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2142 { 2143 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2144 Mat outA; 2145 PetscErrorCode ierr; 2146 PetscTruth row_identity,col_identity; 2147 2148 PetscFunctionBegin; 2149 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2150 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2151 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2152 if (!row_identity || !col_identity) { 2153 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2154 } 2155 2156 outA = inA; 2157 inA->factor = MAT_FACTOR_LU; 2158 2159 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2160 2161 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2162 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2163 a->row = row; 2164 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2165 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2166 a->col = col; 2167 2168 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2169 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2170 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2171 2172 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2173 if (!a->solve_work) { 2174 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2175 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2176 } 2177 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2178 2179 PetscFunctionReturn(0); 2180 } 2181 2182 EXTERN_C_BEGIN 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2185 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2186 { 2187 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2188 PetscInt i,nz,mbs; 2189 2190 PetscFunctionBegin; 2191 nz = baij->maxnz/baij->bs2; 2192 mbs = baij->mbs; 2193 for (i=0; i<nz; i++) { 2194 baij->j[i] = indices[i]; 2195 } 2196 baij->nz = nz; 2197 for (i=0; i<mbs; i++) { 2198 baij->ilen[i] = baij->imax[i]; 2199 } 2200 PetscFunctionReturn(0); 2201 } 2202 EXTERN_C_END 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2206 /*@ 2207 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2208 in the matrix. 2209 2210 Input Parameters: 2211 + mat - the SeqBAIJ matrix 2212 - indices - the column indices 2213 2214 Level: advanced 2215 2216 Notes: 2217 This can be called if you have precomputed the nonzero structure of the 2218 matrix and want to provide it to the matrix object to improve the performance 2219 of the MatSetValues() operation. 2220 2221 You MUST have set the correct numbers of nonzeros per row in the call to 2222 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2223 2224 MUST be called before any calls to MatSetValues(); 2225 2226 @*/ 2227 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2228 { 2229 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2230 2231 PetscFunctionBegin; 2232 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2233 PetscValidPointer(indices,2); 2234 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2235 if (f) { 2236 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2237 } else { 2238 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2239 } 2240 PetscFunctionReturn(0); 2241 } 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2245 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2246 { 2247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2250 PetscReal atmp; 2251 PetscScalar *x,zero = 0.0; 2252 MatScalar *aa; 2253 PetscInt ncols,brow,krow,kcol; 2254 2255 PetscFunctionBegin; 2256 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2257 bs = A->rmap->bs; 2258 aa = a->a; 2259 ai = a->i; 2260 aj = a->j; 2261 mbs = a->mbs; 2262 2263 ierr = VecSet(v,zero);CHKERRQ(ierr); 2264 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2265 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2266 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2267 for (i=0; i<mbs; i++) { 2268 ncols = ai[1] - ai[0]; ai++; 2269 brow = bs*i; 2270 for (j=0; j<ncols; j++){ 2271 for (kcol=0; kcol<bs; kcol++){ 2272 for (krow=0; krow<bs; krow++){ 2273 atmp = PetscAbsScalar(*aa);aa++; 2274 row = brow + krow; /* row index */ 2275 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2276 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2277 } 2278 } 2279 aj++; 2280 } 2281 } 2282 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatCopy_SeqBAIJ" 2288 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2289 { 2290 PetscErrorCode ierr; 2291 2292 PetscFunctionBegin; 2293 /* If the two matrices have the same copy implementation, use fast copy. */ 2294 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2295 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2296 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2297 2298 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2299 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2300 } 2301 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2302 } else { 2303 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2304 } 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2310 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2311 { 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2321 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2322 { 2323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2324 PetscFunctionBegin; 2325 *array = a->a; 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2331 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2332 { 2333 PetscFunctionBegin; 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #include "petscblaslapack.h" 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2340 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2341 { 2342 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,bs=Y->rmap->bs,j,bs2; 2345 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2346 2347 PetscFunctionBegin; 2348 if (str == SAME_NONZERO_PATTERN) { 2349 PetscScalar alpha = a; 2350 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2351 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2352 if (y->xtoy && y->XtoY != X) { 2353 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2354 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2355 } 2356 if (!y->xtoy) { /* get xtoy */ 2357 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2358 y->XtoY = X; 2359 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2360 } 2361 bs2 = bs*bs; 2362 for (i=0; i<x->nz; i++) { 2363 j = 0; 2364 while (j < bs2){ 2365 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2366 j++; 2367 } 2368 } 2369 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2370 } else { 2371 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2372 } 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2378 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2379 { 2380 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2381 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2382 MatScalar *aa = a->a; 2383 2384 PetscFunctionBegin; 2385 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2391 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2392 { 2393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2394 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2395 MatScalar *aa = a->a; 2396 2397 PetscFunctionBegin; 2398 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 2403 /* -------------------------------------------------------------------*/ 2404 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2405 MatGetRow_SeqBAIJ, 2406 MatRestoreRow_SeqBAIJ, 2407 MatMult_SeqBAIJ_N, 2408 /* 4*/ MatMultAdd_SeqBAIJ_N, 2409 MatMultTranspose_SeqBAIJ, 2410 MatMultTransposeAdd_SeqBAIJ, 2411 0, 2412 0, 2413 0, 2414 /*10*/ 0, 2415 MatLUFactor_SeqBAIJ, 2416 0, 2417 0, 2418 MatTranspose_SeqBAIJ, 2419 /*15*/ MatGetInfo_SeqBAIJ, 2420 MatEqual_SeqBAIJ, 2421 MatGetDiagonal_SeqBAIJ, 2422 MatDiagonalScale_SeqBAIJ, 2423 MatNorm_SeqBAIJ, 2424 /*20*/ 0, 2425 MatAssemblyEnd_SeqBAIJ, 2426 MatSetOption_SeqBAIJ, 2427 MatZeroEntries_SeqBAIJ, 2428 /*24*/ MatZeroRows_SeqBAIJ, 2429 0, 2430 0, 2431 0, 2432 0, 2433 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2434 0, 2435 0, 2436 MatGetArray_SeqBAIJ, 2437 MatRestoreArray_SeqBAIJ, 2438 /*34*/ MatDuplicate_SeqBAIJ, 2439 0, 2440 0, 2441 MatILUFactor_SeqBAIJ, 2442 0, 2443 /*39*/ MatAXPY_SeqBAIJ, 2444 MatGetSubMatrices_SeqBAIJ, 2445 MatIncreaseOverlap_SeqBAIJ, 2446 MatGetValues_SeqBAIJ, 2447 MatCopy_SeqBAIJ, 2448 /*44*/ 0, 2449 MatScale_SeqBAIJ, 2450 0, 2451 0, 2452 MatILUDTFactor_SeqBAIJ, 2453 /*49*/ 0, 2454 MatGetRowIJ_SeqBAIJ, 2455 MatRestoreRowIJ_SeqBAIJ, 2456 0, 2457 0, 2458 /*54*/ 0, 2459 0, 2460 0, 2461 0, 2462 MatSetValuesBlocked_SeqBAIJ, 2463 /*59*/ MatGetSubMatrix_SeqBAIJ, 2464 MatDestroy_SeqBAIJ, 2465 MatView_SeqBAIJ, 2466 0, 2467 0, 2468 /*64*/ 0, 2469 0, 2470 0, 2471 0, 2472 0, 2473 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2474 0, 2475 MatConvert_Basic, 2476 0, 2477 0, 2478 /*74*/ 0, 2479 0, 2480 0, 2481 0, 2482 0, 2483 /*79*/ 0, 2484 0, 2485 0, 2486 0, 2487 MatLoad_SeqBAIJ, 2488 /*84*/ 0, 2489 0, 2490 0, 2491 0, 2492 0, 2493 /*89*/ 0, 2494 0, 2495 0, 2496 0, 2497 0, 2498 /*94*/ 0, 2499 0, 2500 0, 2501 0, 2502 0, 2503 /*99*/0, 2504 0, 2505 0, 2506 0, 2507 0, 2508 /*104*/0, 2509 MatRealPart_SeqBAIJ, 2510 MatImaginaryPart_SeqBAIJ, 2511 0, 2512 0, 2513 /*109*/0, 2514 0, 2515 0, 2516 0, 2517 MatMissingDiagonal_SeqBAIJ 2518 /*114*/ 2519 }; 2520 2521 EXTERN_C_BEGIN 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2524 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2525 { 2526 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2527 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 if (aij->nonew != 1) { 2532 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2533 } 2534 2535 /* allocate space for values if not already there */ 2536 if (!aij->saved_values) { 2537 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2538 } 2539 2540 /* copy values over */ 2541 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 EXTERN_C_END 2545 2546 EXTERN_C_BEGIN 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2549 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2550 { 2551 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2552 PetscErrorCode ierr; 2553 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2554 2555 PetscFunctionBegin; 2556 if (aij->nonew != 1) { 2557 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2558 } 2559 if (!aij->saved_values) { 2560 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2561 } 2562 2563 /* copy values over */ 2564 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 EXTERN_C_END 2568 2569 EXTERN_C_BEGIN 2570 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2572 EXTERN_C_END 2573 2574 EXTERN_C_BEGIN 2575 #undef __FUNCT__ 2576 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2577 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2578 { 2579 Mat_SeqBAIJ *b; 2580 PetscErrorCode ierr; 2581 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2582 PetscTruth flg,skipallocation = PETSC_FALSE; 2583 2584 PetscFunctionBegin; 2585 2586 if (nz == MAT_SKIP_ALLOCATION) { 2587 skipallocation = PETSC_TRUE; 2588 nz = 0; 2589 } 2590 2591 if (bs < 0) { 2592 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2593 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2594 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2595 bs = PetscAbs(bs); 2596 } 2597 if (nnz && newbs != bs) { 2598 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2599 } 2600 bs = newbs; 2601 2602 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2603 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2604 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2605 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2606 2607 B->preallocated = PETSC_TRUE; 2608 2609 mbs = B->rmap->n/bs; 2610 nbs = B->cmap->n/bs; 2611 bs2 = bs*bs; 2612 2613 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2614 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2615 } 2616 2617 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2618 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2619 if (nnz) { 2620 for (i=0; i<mbs; i++) { 2621 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2622 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2623 } 2624 } 2625 2626 b = (Mat_SeqBAIJ*)B->data; 2627 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2628 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2629 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2630 2631 if (!flg) { 2632 switch (bs) { 2633 case 1: 2634 B->ops->mult = MatMult_SeqBAIJ_1; 2635 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2636 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2637 break; 2638 case 2: 2639 B->ops->mult = MatMult_SeqBAIJ_2; 2640 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2641 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2642 break; 2643 case 3: 2644 B->ops->mult = MatMult_SeqBAIJ_3; 2645 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2646 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2647 break; 2648 case 4: 2649 B->ops->mult = MatMult_SeqBAIJ_4; 2650 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2651 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2652 break; 2653 case 5: 2654 B->ops->mult = MatMult_SeqBAIJ_5; 2655 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2656 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2657 break; 2658 case 6: 2659 B->ops->mult = MatMult_SeqBAIJ_6; 2660 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2661 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2662 break; 2663 case 7: 2664 B->ops->mult = MatMult_SeqBAIJ_7; 2665 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2666 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2667 break; 2668 default: 2669 B->ops->mult = MatMult_SeqBAIJ_N; 2670 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2671 break; 2672 } 2673 } 2674 B->rmap->bs = bs; 2675 b->mbs = mbs; 2676 b->nbs = nbs; 2677 if (!skipallocation) { 2678 if (!b->imax) { 2679 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2680 } 2681 /* b->ilen will count nonzeros in each block row so far. */ 2682 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2683 if (!nnz) { 2684 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2685 else if (nz <= 0) nz = 1; 2686 for (i=0; i<mbs; i++) b->imax[i] = nz; 2687 nz = nz*mbs; 2688 } else { 2689 nz = 0; 2690 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2691 } 2692 2693 /* allocate the matrix space */ 2694 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2695 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2696 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2697 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2698 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2699 b->singlemalloc = PETSC_TRUE; 2700 b->i[0] = 0; 2701 for (i=1; i<mbs+1; i++) { 2702 b->i[i] = b->i[i-1] + b->imax[i-1]; 2703 } 2704 b->free_a = PETSC_TRUE; 2705 b->free_ij = PETSC_TRUE; 2706 } else { 2707 b->free_a = PETSC_FALSE; 2708 b->free_ij = PETSC_FALSE; 2709 } 2710 2711 B->rmap->bs = bs; 2712 b->bs2 = bs2; 2713 b->mbs = mbs; 2714 b->nz = 0; 2715 b->maxnz = nz*bs2; 2716 B->info.nz_unneeded = (PetscReal)b->maxnz; 2717 PetscFunctionReturn(0); 2718 } 2719 EXTERN_C_END 2720 2721 EXTERN_C_BEGIN 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2724 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2725 { 2726 PetscInt i,m,nz,nz_max=0,*nnz; 2727 PetscScalar *values=0; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 2732 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2733 2734 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2735 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2736 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2737 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2738 m = B->rmap->n/bs; 2739 2740 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2741 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2742 for(i=0; i<m; i++) { 2743 nz = ii[i+1]- ii[i]; 2744 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2745 nz_max = PetscMax(nz_max, nz); 2746 nnz[i] = nz; 2747 } 2748 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2749 ierr = PetscFree(nnz);CHKERRQ(ierr); 2750 2751 values = (PetscScalar*)V; 2752 if (!values) { 2753 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2754 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2755 } 2756 for (i=0; i<m; i++) { 2757 PetscInt ncols = ii[i+1] - ii[i]; 2758 const PetscInt *icols = jj + ii[i]; 2759 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2760 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2761 } 2762 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2763 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 2766 PetscFunctionReturn(0); 2767 } 2768 EXTERN_C_END 2769 2770 2771 EXTERN_C_BEGIN 2772 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2773 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 2774 EXTERN_C_END 2775 2776 /*MC 2777 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2778 block sparse compressed row format. 2779 2780 Options Database Keys: 2781 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2782 2783 Level: beginner 2784 2785 .seealso: MatCreateSeqBAIJ() 2786 M*/ 2787 2788 2789 EXTERN_C_BEGIN 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatCreate_SeqBAIJ" 2792 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2793 { 2794 PetscErrorCode ierr; 2795 PetscMPIInt size; 2796 Mat_SeqBAIJ *b; 2797 2798 PetscFunctionBegin; 2799 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2800 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2801 2802 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2803 B->data = (void*)b; 2804 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2805 B->mapping = 0; 2806 b->row = 0; 2807 b->col = 0; 2808 b->icol = 0; 2809 b->reallocs = 0; 2810 b->saved_values = 0; 2811 2812 b->roworiented = PETSC_TRUE; 2813 b->nonew = 0; 2814 b->diag = 0; 2815 b->solve_work = 0; 2816 b->mult_work = 0; 2817 B->spptr = 0; 2818 B->info.nz_unneeded = (PetscReal)b->maxnz; 2819 b->keepnonzeropattern = PETSC_FALSE; 2820 b->xtoy = 0; 2821 b->XtoY = 0; 2822 b->compressedrow.use = PETSC_FALSE; 2823 b->compressedrow.nrows = 0; 2824 b->compressedrow.i = PETSC_NULL; 2825 b->compressedrow.rindex = PETSC_NULL; 2826 b->compressedrow.checked = PETSC_FALSE; 2827 B->same_nonzero = PETSC_FALSE; 2828 2829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 2830 "MatGetFactorAvailable_seqbaij_petsc", 2831 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 2832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2833 "MatGetFactor_seqbaij_petsc", 2834 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2836 "MatInvertBlockDiagonal_SeqBAIJ", 2837 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2839 "MatStoreValues_SeqBAIJ", 2840 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2842 "MatRetrieveValues_SeqBAIJ", 2843 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2845 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2846 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2848 "MatConvert_SeqBAIJ_SeqAIJ", 2849 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2851 "MatConvert_SeqBAIJ_SeqSBAIJ", 2852 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2854 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2855 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2857 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2858 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2859 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 EXTERN_C_END 2863 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2866 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 2867 { 2868 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2869 PetscErrorCode ierr; 2870 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2871 2872 PetscFunctionBegin; 2873 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2874 2875 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2876 for (i=0; i<mbs; i++) { 2877 c->imax[i] = a->imax[i]; 2878 c->ilen[i] = a->ilen[i]; 2879 } 2880 2881 /* allocate the matrix space */ 2882 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2883 c->singlemalloc = PETSC_TRUE; 2884 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2885 if (mbs > 0) { 2886 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2887 if (cpvalues == MAT_COPY_VALUES) { 2888 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2889 } else { 2890 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2891 } 2892 } 2893 c->roworiented = a->roworiented; 2894 c->nonew = a->nonew; 2895 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2896 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2897 c->bs2 = a->bs2; 2898 c->mbs = a->mbs; 2899 c->nbs = a->nbs; 2900 2901 if (a->diag) { 2902 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2903 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2904 for (i=0; i<mbs; i++) { 2905 c->diag[i] = a->diag[i]; 2906 } 2907 } else c->diag = 0; 2908 c->nz = a->nz; 2909 c->maxnz = a->maxnz; 2910 c->solve_work = 0; 2911 c->mult_work = 0; 2912 c->free_a = PETSC_TRUE; 2913 c->free_ij = PETSC_TRUE; 2914 C->preallocated = PETSC_TRUE; 2915 C->assembled = PETSC_TRUE; 2916 2917 c->compressedrow.use = a->compressedrow.use; 2918 c->compressedrow.nrows = a->compressedrow.nrows; 2919 c->compressedrow.checked = a->compressedrow.checked; 2920 if ( a->compressedrow.checked && a->compressedrow.use){ 2921 i = a->compressedrow.nrows; 2922 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2923 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2924 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2925 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2926 } else { 2927 c->compressedrow.use = PETSC_FALSE; 2928 c->compressedrow.i = PETSC_NULL; 2929 c->compressedrow.rindex = PETSC_NULL; 2930 } 2931 C->same_nonzero = A->same_nonzero; 2932 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2933 PetscFunctionReturn(0); 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2938 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2939 { 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2944 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2945 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2946 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatLoad_SeqBAIJ" 2952 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2953 { 2954 Mat_SeqBAIJ *a; 2955 Mat B; 2956 PetscErrorCode ierr; 2957 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2958 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2959 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2960 PetscInt *masked,nmask,tmp,bs2,ishift; 2961 PetscMPIInt size; 2962 int fd; 2963 PetscScalar *aa; 2964 MPI_Comm comm = ((PetscObject)viewer)->comm; 2965 2966 PetscFunctionBegin; 2967 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2968 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2969 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2970 bs2 = bs*bs; 2971 2972 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2973 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2974 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2975 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2976 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2977 M = header[1]; N = header[2]; nz = header[3]; 2978 2979 if (header[3] < 0) { 2980 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2981 } 2982 2983 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2984 2985 /* 2986 This code adds extra rows to make sure the number of rows is 2987 divisible by the blocksize 2988 */ 2989 mbs = M/bs; 2990 extra_rows = bs - M + bs*(mbs); 2991 if (extra_rows == bs) extra_rows = 0; 2992 else mbs++; 2993 if (extra_rows) { 2994 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2995 } 2996 2997 /* read in row lengths */ 2998 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2999 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3000 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3001 3002 /* read in column indices */ 3003 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3004 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3005 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3006 3007 /* loop over row lengths determining block row lengths */ 3008 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3009 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3010 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3011 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3012 masked = mask + mbs; 3013 rowcount = 0; nzcount = 0; 3014 for (i=0; i<mbs; i++) { 3015 nmask = 0; 3016 for (j=0; j<bs; j++) { 3017 kmax = rowlengths[rowcount]; 3018 for (k=0; k<kmax; k++) { 3019 tmp = jj[nzcount++]/bs; 3020 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3021 } 3022 rowcount++; 3023 } 3024 browlengths[i] += nmask; 3025 /* zero out the mask elements we set */ 3026 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3027 } 3028 3029 /* create our matrix */ 3030 ierr = MatCreate(comm,&B); 3031 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3032 ierr = MatSetType(B,type);CHKERRQ(ierr); 3033 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3034 a = (Mat_SeqBAIJ*)B->data; 3035 3036 /* set matrix "i" values */ 3037 a->i[0] = 0; 3038 for (i=1; i<= mbs; i++) { 3039 a->i[i] = a->i[i-1] + browlengths[i-1]; 3040 a->ilen[i-1] = browlengths[i-1]; 3041 } 3042 a->nz = 0; 3043 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3044 3045 /* read in nonzero values */ 3046 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3047 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3048 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3049 3050 /* set "a" and "j" values into matrix */ 3051 nzcount = 0; jcount = 0; 3052 for (i=0; i<mbs; i++) { 3053 nzcountb = nzcount; 3054 nmask = 0; 3055 for (j=0; j<bs; j++) { 3056 kmax = rowlengths[i*bs+j]; 3057 for (k=0; k<kmax; k++) { 3058 tmp = jj[nzcount++]/bs; 3059 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3060 } 3061 } 3062 /* sort the masked values */ 3063 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3064 3065 /* set "j" values into matrix */ 3066 maskcount = 1; 3067 for (j=0; j<nmask; j++) { 3068 a->j[jcount++] = masked[j]; 3069 mask[masked[j]] = maskcount++; 3070 } 3071 /* set "a" values into matrix */ 3072 ishift = bs2*a->i[i]; 3073 for (j=0; j<bs; j++) { 3074 kmax = rowlengths[i*bs+j]; 3075 for (k=0; k<kmax; k++) { 3076 tmp = jj[nzcountb]/bs ; 3077 block = mask[tmp] - 1; 3078 point = jj[nzcountb] - bs*tmp; 3079 idx = ishift + bs2*block + j + bs*point; 3080 a->a[idx] = (MatScalar)aa[nzcountb++]; 3081 } 3082 } 3083 /* zero out the mask elements we set */ 3084 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3085 } 3086 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3087 3088 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3089 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3090 ierr = PetscFree(aa);CHKERRQ(ierr); 3091 ierr = PetscFree(jj);CHKERRQ(ierr); 3092 ierr = PetscFree(mask);CHKERRQ(ierr); 3093 3094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 ierr = MatView_Private(B);CHKERRQ(ierr); 3097 3098 *A = B; 3099 PetscFunctionReturn(0); 3100 } 3101 3102 #undef __FUNCT__ 3103 #define __FUNCT__ "MatCreateSeqBAIJ" 3104 /*@C 3105 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3106 compressed row) format. For good matrix assembly performance the 3107 user should preallocate the matrix storage by setting the parameter nz 3108 (or the array nnz). By setting these parameters accurately, performance 3109 during matrix assembly can be increased by more than a factor of 50. 3110 3111 Collective on MPI_Comm 3112 3113 Input Parameters: 3114 + comm - MPI communicator, set to PETSC_COMM_SELF 3115 . bs - size of block 3116 . m - number of rows 3117 . n - number of columns 3118 . nz - number of nonzero blocks per block row (same for all rows) 3119 - nnz - array containing the number of nonzero blocks in the various block rows 3120 (possibly different for each block row) or PETSC_NULL 3121 3122 Output Parameter: 3123 . A - the matrix 3124 3125 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3126 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3127 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3128 3129 Options Database Keys: 3130 . -mat_no_unroll - uses code that does not unroll the loops in the 3131 block calculations (much slower) 3132 . -mat_block_size - size of the blocks to use 3133 3134 Level: intermediate 3135 3136 Notes: 3137 The number of rows and columns must be divisible by blocksize. 3138 3139 If the nnz parameter is given then the nz parameter is ignored 3140 3141 A nonzero block is any block that as 1 or more nonzeros in it 3142 3143 The block AIJ format is fully compatible with standard Fortran 77 3144 storage. That is, the stored row and column indices can begin at 3145 either one (as in Fortran) or zero. See the users' manual for details. 3146 3147 Specify the preallocated storage with either nz or nnz (not both). 3148 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3149 allocation. For additional details, see the users manual chapter on 3150 matrices. 3151 3152 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3153 @*/ 3154 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3155 { 3156 PetscErrorCode ierr; 3157 3158 PetscFunctionBegin; 3159 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3160 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3161 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3162 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3168 /*@C 3169 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3170 per row in the matrix. For good matrix assembly performance the 3171 user should preallocate the matrix storage by setting the parameter nz 3172 (or the array nnz). By setting these parameters accurately, performance 3173 during matrix assembly can be increased by more than a factor of 50. 3174 3175 Collective on MPI_Comm 3176 3177 Input Parameters: 3178 + A - the matrix 3179 . bs - size of block 3180 . nz - number of block nonzeros per block row (same for all rows) 3181 - nnz - array containing the number of block nonzeros in the various block rows 3182 (possibly different for each block row) or PETSC_NULL 3183 3184 Options Database Keys: 3185 . -mat_no_unroll - uses code that does not unroll the loops in the 3186 block calculations (much slower) 3187 . -mat_block_size - size of the blocks to use 3188 3189 Level: intermediate 3190 3191 Notes: 3192 If the nnz parameter is given then the nz parameter is ignored 3193 3194 You can call MatGetInfo() to get information on how effective the preallocation was; 3195 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3196 You can also run with the option -info and look for messages with the string 3197 malloc in them to see if additional memory allocation was needed. 3198 3199 The block AIJ format is fully compatible with standard Fortran 77 3200 storage. That is, the stored row and column indices can begin at 3201 either one (as in Fortran) or zero. See the users' manual for details. 3202 3203 Specify the preallocated storage with either nz or nnz (not both). 3204 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3205 allocation. For additional details, see the users manual chapter on 3206 matrices. 3207 3208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3209 @*/ 3210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3211 { 3212 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3213 3214 PetscFunctionBegin; 3215 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3216 if (f) { 3217 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3218 } 3219 PetscFunctionReturn(0); 3220 } 3221 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3224 /*@C 3225 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3226 (the default sequential PETSc format). 3227 3228 Collective on MPI_Comm 3229 3230 Input Parameters: 3231 + A - the matrix 3232 . i - the indices into j for the start of each local row (starts with zero) 3233 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3234 - v - optional values in the matrix 3235 3236 Level: developer 3237 3238 .keywords: matrix, aij, compressed row, sparse 3239 3240 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3241 @*/ 3242 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3243 { 3244 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3245 3246 PetscFunctionBegin; 3247 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3248 if (f) { 3249 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3250 } 3251 PetscFunctionReturn(0); 3252 } 3253 3254 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3257 /*@ 3258 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3259 (upper triangular entries in CSR format) provided by the user. 3260 3261 Collective on MPI_Comm 3262 3263 Input Parameters: 3264 + comm - must be an MPI communicator of size 1 3265 . bs - size of block 3266 . m - number of rows 3267 . n - number of columns 3268 . i - row indices 3269 . j - column indices 3270 - a - matrix values 3271 3272 Output Parameter: 3273 . mat - the matrix 3274 3275 Level: intermediate 3276 3277 Notes: 3278 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3279 once the matrix is destroyed 3280 3281 You cannot set new nonzero locations into this matrix, that will generate an error. 3282 3283 The i and j indices are 0 based 3284 3285 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3286 3287 @*/ 3288 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3289 { 3290 PetscErrorCode ierr; 3291 PetscInt ii; 3292 Mat_SeqBAIJ *baij; 3293 3294 PetscFunctionBegin; 3295 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3296 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3297 3298 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3299 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3300 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3301 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3302 baij = (Mat_SeqBAIJ*)(*mat)->data; 3303 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3304 3305 baij->i = i; 3306 baij->j = j; 3307 baij->a = a; 3308 baij->singlemalloc = PETSC_FALSE; 3309 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3310 baij->free_a = PETSC_FALSE; 3311 baij->free_ij = PETSC_FALSE; 3312 3313 for (ii=0; ii<m; ii++) { 3314 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3315 #if defined(PETSC_USE_DEBUG) 3316 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3317 #endif 3318 } 3319 #if defined(PETSC_USE_DEBUG) 3320 for (ii=0; ii<baij->i[m]; ii++) { 3321 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3322 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3323 } 3324 #endif 3325 3326 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330