1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "../src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "../src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 if (A->rmap->n > 0 && !jj) { 1105 *missing = PETSC_TRUE; 1106 if (d) *d = 0; 1107 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1108 } else { 1109 diag = a->diag; 1110 for (i=0; i<a->mbs; i++) { 1111 if (jj[diag[i]] != i) { 1112 *missing = PETSC_TRUE; 1113 if (d) *d = i; 1114 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1115 } 1116 } 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124 { 1125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 PetscInt i,j,m = a->mbs; 1128 1129 PetscFunctionBegin; 1130 if (!a->diag) { 1131 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1132 } 1133 for (i=0; i<m; i++) { 1134 a->diag[i] = a->i[i+1]; 1135 for (j=a->i[i]; j<a->i[i+1]; j++) { 1136 if (a->j[j] == i) { 1137 a->diag[i] = j; 1138 break; 1139 } 1140 } 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 1146 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1150 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1151 { 1152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1155 PetscInt *tia, *tja; 1156 1157 PetscFunctionBegin; 1158 *nn = n; 1159 if (!ia) PetscFunctionReturn(0); 1160 if (symmetric) { 1161 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1162 } else { 1163 tia = a->i; tja = a->j; 1164 } 1165 1166 if (!blockcompressed && bs > 1) { 1167 (*nn) *= bs; 1168 nbs = bs; 1169 /* malloc & create the natural set of indices */ 1170 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1171 if (n) { 1172 (*ia)[0] = 0; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1175 } 1176 } 1177 1178 for (i=1; i<n; i++) { 1179 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1180 for (j=1; j<bs; j++) { 1181 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1182 } 1183 } 1184 if (n) { 1185 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1186 } 1187 1188 if (ja) { 1189 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<n; i++) { 1192 for (j=0; j<bs; j++) { 1193 for (k=tia[i]; k<tia[i+1]; k++) { 1194 for (l=0; l<bs; l++) { 1195 (*ja)[cnt++] = bs*tja[k] + l; 1196 } 1197 } 1198 } 1199 } 1200 } 1201 1202 n *= bs; 1203 nz *= bs*bs; 1204 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1205 ierr = PetscFree(tia);CHKERRQ(ierr); 1206 ierr = PetscFree(tja);CHKERRQ(ierr); 1207 } 1208 } else { 1209 *ia = tia; 1210 if (ja) *ja = tja; 1211 } 1212 if (oshift == 1) { 1213 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1214 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1221 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1222 { 1223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 PetscInt i,n = a->mbs,nz = a->i[n]; 1226 1227 PetscFunctionBegin; 1228 if (!ia) PetscFunctionReturn(0); 1229 if (!blockcompressed && A->rmap->bs > 1) { 1230 ierr = PetscFree(*ia);CHKERRQ(ierr); 1231 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1232 } else if (symmetric) { 1233 ierr = PetscFree(*ia);CHKERRQ(ierr); 1234 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1235 } else if (oshift == 1) { /* blockcompressed */ 1236 for (i=0; i<n+1; i++) a->i[i]--; 1237 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1244 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 #if defined(PETSC_USE_LOG) 1251 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1252 #endif 1253 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1254 if (a->row) { 1255 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1256 } 1257 if (a->col) { 1258 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1259 } 1260 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1261 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1262 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1263 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1264 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1265 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1266 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1267 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1268 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1269 1270 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1271 ierr = PetscFree(a);CHKERRQ(ierr); 1272 1273 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1287 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1288 { 1289 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1290 PetscErrorCode ierr; 1291 1292 PetscFunctionBegin; 1293 switch (op) { 1294 case MAT_ROW_ORIENTED: 1295 a->roworiented = flg; 1296 break; 1297 case MAT_KEEP_ZEROED_ROWS: 1298 a->keepzeroedrows = flg; 1299 break; 1300 case MAT_NEW_NONZERO_LOCATIONS: 1301 a->nonew = (flg ? 0 : 1); 1302 break; 1303 case MAT_NEW_NONZERO_LOCATION_ERR: 1304 a->nonew = (flg ? -1 : 0); 1305 break; 1306 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1307 a->nonew = (flg ? -2 : 0); 1308 break; 1309 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1310 a->nounused = (flg ? -1 : 0); 1311 break; 1312 case MAT_NEW_DIAGONALS: 1313 case MAT_IGNORE_OFF_PROC_ENTRIES: 1314 case MAT_USE_HASH_TABLE: 1315 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1316 break; 1317 case MAT_SYMMETRIC: 1318 case MAT_STRUCTURALLY_SYMMETRIC: 1319 case MAT_HERMITIAN: 1320 case MAT_SYMMETRY_ETERNAL: 1321 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1322 break; 1323 default: 1324 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1331 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332 { 1333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1334 PetscErrorCode ierr; 1335 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1336 MatScalar *aa,*aa_i; 1337 PetscScalar *v_i; 1338 1339 PetscFunctionBegin; 1340 bs = A->rmap->bs; 1341 ai = a->i; 1342 aj = a->j; 1343 aa = a->a; 1344 bs2 = a->bs2; 1345 1346 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1347 1348 bn = row/bs; /* Block number */ 1349 bp = row % bs; /* Block Position */ 1350 M = ai[bn+1] - ai[bn]; 1351 *nz = bs*M; 1352 1353 if (v) { 1354 *v = 0; 1355 if (*nz) { 1356 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1357 for (i=0; i<M; i++) { /* for each block in the block row */ 1358 v_i = *v + i*bs; 1359 aa_i = aa + bs2*(ai[bn] + i); 1360 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1361 } 1362 } 1363 } 1364 1365 if (idx) { 1366 *idx = 0; 1367 if (*nz) { 1368 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1369 for (i=0; i<M; i++) { /* for each block in the block row */ 1370 idx_i = *idx + i*bs; 1371 itmp = bs*aj[ai[bn] + i]; 1372 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1373 } 1374 } 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1381 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1382 { 1383 PetscErrorCode ierr; 1384 1385 PetscFunctionBegin; 1386 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1387 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1393 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1394 { 1395 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1396 Mat C; 1397 PetscErrorCode ierr; 1398 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1399 PetscInt *rows,*cols,bs2=a->bs2; 1400 MatScalar *array; 1401 1402 PetscFunctionBegin; 1403 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1404 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1405 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1406 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1407 1408 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1409 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1410 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1411 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1412 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1413 ierr = PetscFree(col);CHKERRQ(ierr); 1414 } else { 1415 C = *B; 1416 } 1417 1418 array = a->a; 1419 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1420 cols = rows + bs; 1421 for (i=0; i<mbs; i++) { 1422 cols[0] = i*bs; 1423 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1424 len = ai[i+1] - ai[i]; 1425 for (j=0; j<len; j++) { 1426 rows[0] = (*aj++)*bs; 1427 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1428 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1429 array += bs2; 1430 } 1431 } 1432 ierr = PetscFree(rows);CHKERRQ(ierr); 1433 1434 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 1437 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1438 *B = C; 1439 } else { 1440 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1447 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1448 { 1449 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1450 PetscErrorCode ierr; 1451 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1452 int fd; 1453 PetscScalar *aa; 1454 FILE *file; 1455 1456 PetscFunctionBegin; 1457 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1458 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1459 col_lens[0] = MAT_FILE_COOKIE; 1460 1461 col_lens[1] = A->rmap->N; 1462 col_lens[2] = A->cmap->n; 1463 col_lens[3] = a->nz*bs2; 1464 1465 /* store lengths of each row and write (including header) to file */ 1466 count = 0; 1467 for (i=0; i<a->mbs; i++) { 1468 for (j=0; j<bs; j++) { 1469 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1470 } 1471 } 1472 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1473 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1474 1475 /* store column indices (zero start index) */ 1476 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1477 count = 0; 1478 for (i=0; i<a->mbs; i++) { 1479 for (j=0; j<bs; j++) { 1480 for (k=a->i[i]; k<a->i[i+1]; k++) { 1481 for (l=0; l<bs; l++) { 1482 jj[count++] = bs*a->j[k] + l; 1483 } 1484 } 1485 } 1486 } 1487 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1488 ierr = PetscFree(jj);CHKERRQ(ierr); 1489 1490 /* store nonzero values */ 1491 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1492 count = 0; 1493 for (i=0; i<a->mbs; i++) { 1494 for (j=0; j<bs; j++) { 1495 for (k=a->i[i]; k<a->i[i+1]; k++) { 1496 for (l=0; l<bs; l++) { 1497 aa[count++] = a->a[bs2*k + l*bs + j]; 1498 } 1499 } 1500 } 1501 } 1502 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1503 ierr = PetscFree(aa);CHKERRQ(ierr); 1504 1505 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1506 if (file) { 1507 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1514 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1515 { 1516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1517 PetscErrorCode ierr; 1518 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1519 PetscViewerFormat format; 1520 1521 PetscFunctionBegin; 1522 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1523 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1524 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1525 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1526 Mat aij; 1527 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1528 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1529 ierr = MatDestroy(aij);CHKERRQ(ierr); 1530 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1531 PetscFunctionReturn(0); 1532 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1533 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1534 for (i=0; i<a->mbs; i++) { 1535 for (j=0; j<bs; j++) { 1536 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1537 for (k=a->i[i]; k<a->i[i+1]; k++) { 1538 for (l=0; l<bs; l++) { 1539 #if defined(PETSC_USE_COMPLEX) 1540 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1542 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1545 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1546 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1547 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1548 } 1549 #else 1550 if (a->a[bs2*k + l*bs + j] != 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1552 } 1553 #endif 1554 } 1555 } 1556 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1557 } 1558 } 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1560 } else { 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1562 for (i=0; i<a->mbs; i++) { 1563 for (j=0; j<bs; j++) { 1564 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1565 for (k=a->i[i]; k<a->i[i+1]; k++) { 1566 for (l=0; l<bs; l++) { 1567 #if defined(PETSC_USE_COMPLEX) 1568 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1570 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1573 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } else { 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } 1577 #else 1578 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1579 #endif 1580 } 1581 } 1582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1583 } 1584 } 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1586 } 1587 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1593 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1594 { 1595 Mat A = (Mat) Aa; 1596 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1597 PetscErrorCode ierr; 1598 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1599 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1600 MatScalar *aa; 1601 PetscViewer viewer; 1602 1603 PetscFunctionBegin; 1604 1605 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1606 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1607 1608 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1609 1610 /* loop over matrix elements drawing boxes */ 1611 color = PETSC_DRAW_BLUE; 1612 for (i=0,row=0; i<mbs; i++,row+=bs) { 1613 for (j=a->i[i]; j<a->i[i+1]; j++) { 1614 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1615 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1616 aa = a->a + j*bs2; 1617 for (k=0; k<bs; k++) { 1618 for (l=0; l<bs; l++) { 1619 if (PetscRealPart(*aa++) >= 0.) continue; 1620 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1621 } 1622 } 1623 } 1624 } 1625 color = PETSC_DRAW_CYAN; 1626 for (i=0,row=0; i<mbs; i++,row+=bs) { 1627 for (j=a->i[i]; j<a->i[i+1]; j++) { 1628 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1629 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1630 aa = a->a + j*bs2; 1631 for (k=0; k<bs; k++) { 1632 for (l=0; l<bs; l++) { 1633 if (PetscRealPart(*aa++) != 0.) continue; 1634 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1635 } 1636 } 1637 } 1638 } 1639 1640 color = PETSC_DRAW_RED; 1641 for (i=0,row=0; i<mbs; i++,row+=bs) { 1642 for (j=a->i[i]; j<a->i[i+1]; j++) { 1643 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1644 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1645 aa = a->a + j*bs2; 1646 for (k=0; k<bs; k++) { 1647 for (l=0; l<bs; l++) { 1648 if (PetscRealPart(*aa++) <= 0.) continue; 1649 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1650 } 1651 } 1652 } 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1659 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1660 { 1661 PetscErrorCode ierr; 1662 PetscReal xl,yl,xr,yr,w,h; 1663 PetscDraw draw; 1664 PetscTruth isnull; 1665 1666 PetscFunctionBegin; 1667 1668 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1669 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1670 1671 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1672 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1673 xr += w; yr += h; xl = -w; yl = -h; 1674 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1675 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1676 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatView_SeqBAIJ" 1682 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1683 { 1684 PetscErrorCode ierr; 1685 PetscTruth iascii,isbinary,isdraw; 1686 1687 PetscFunctionBegin; 1688 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1689 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1690 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1691 if (iascii){ 1692 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1693 } else if (isbinary) { 1694 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1695 } else if (isdraw) { 1696 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1697 } else { 1698 Mat B; 1699 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1700 ierr = MatView(B,viewer);CHKERRQ(ierr); 1701 ierr = MatDestroy(B);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1709 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1710 { 1711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1712 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1713 PetscInt *ai = a->i,*ailen = a->ilen; 1714 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1715 MatScalar *ap,*aa = a->a; 1716 1717 PetscFunctionBegin; 1718 for (k=0; k<m; k++) { /* loop over rows */ 1719 row = im[k]; brow = row/bs; 1720 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1721 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1722 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1723 nrow = ailen[brow]; 1724 for (l=0; l<n; l++) { /* loop over columns */ 1725 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1726 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1727 col = in[l] ; 1728 bcol = col/bs; 1729 cidx = col%bs; 1730 ridx = row%bs; 1731 high = nrow; 1732 low = 0; /* assume unsorted */ 1733 while (high-low > 5) { 1734 t = (low+high)/2; 1735 if (rp[t] > bcol) high = t; 1736 else low = t; 1737 } 1738 for (i=low; i<high; i++) { 1739 if (rp[i] > bcol) break; 1740 if (rp[i] == bcol) { 1741 *v++ = ap[bs2*i+bs*cidx+ridx]; 1742 goto finished; 1743 } 1744 } 1745 *v++ = 0.0; 1746 finished:; 1747 } 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #define CHUNKSIZE 10 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1755 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1756 { 1757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1758 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1759 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1760 PetscErrorCode ierr; 1761 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1762 PetscTruth roworiented=a->roworiented; 1763 const PetscScalar *value = v; 1764 MatScalar *ap,*aa = a->a,*bap; 1765 1766 PetscFunctionBegin; 1767 if (roworiented) { 1768 stepval = (n-1)*bs; 1769 } else { 1770 stepval = (m-1)*bs; 1771 } 1772 for (k=0; k<m; k++) { /* loop over added rows */ 1773 row = im[k]; 1774 if (row < 0) continue; 1775 #if defined(PETSC_USE_DEBUG) 1776 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1777 #endif 1778 rp = aj + ai[row]; 1779 ap = aa + bs2*ai[row]; 1780 rmax = imax[row]; 1781 nrow = ailen[row]; 1782 low = 0; 1783 high = nrow; 1784 for (l=0; l<n; l++) { /* loop over added columns */ 1785 if (in[l] < 0) continue; 1786 #if defined(PETSC_USE_DEBUG) 1787 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1788 #endif 1789 col = in[l]; 1790 if (roworiented) { 1791 value = v + k*(stepval+bs)*bs + l*bs; 1792 } else { 1793 value = v + l*(stepval+bs)*bs + k*bs; 1794 } 1795 if (col <= lastcol) low = 0; else high = nrow; 1796 lastcol = col; 1797 while (high-low > 7) { 1798 t = (low+high)/2; 1799 if (rp[t] > col) high = t; 1800 else low = t; 1801 } 1802 for (i=low; i<high; i++) { 1803 if (rp[i] > col) break; 1804 if (rp[i] == col) { 1805 bap = ap + bs2*i; 1806 if (roworiented) { 1807 if (is == ADD_VALUES) { 1808 for (ii=0; ii<bs; ii++,value+=stepval) { 1809 for (jj=ii; jj<bs2; jj+=bs) { 1810 bap[jj] += *value++; 1811 } 1812 } 1813 } else { 1814 for (ii=0; ii<bs; ii++,value+=stepval) { 1815 for (jj=ii; jj<bs2; jj+=bs) { 1816 bap[jj] = *value++; 1817 } 1818 } 1819 } 1820 } else { 1821 if (is == ADD_VALUES) { 1822 for (ii=0; ii<bs; ii++,value+=stepval) { 1823 for (jj=0; jj<bs; jj++) { 1824 *bap++ += *value++; 1825 } 1826 } 1827 } else { 1828 for (ii=0; ii<bs; ii++,value+=stepval) { 1829 for (jj=0; jj<bs; jj++) { 1830 *bap++ = *value++; 1831 } 1832 } 1833 } 1834 } 1835 goto noinsert2; 1836 } 1837 } 1838 if (nonew == 1) goto noinsert2; 1839 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1840 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1841 N = nrow++ - 1; high++; 1842 /* shift up all the later entries in this row */ 1843 for (ii=N; ii>=i; ii--) { 1844 rp[ii+1] = rp[ii]; 1845 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1846 } 1847 if (N >= i) { 1848 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1849 } 1850 rp[i] = col; 1851 bap = ap + bs2*i; 1852 if (roworiented) { 1853 for (ii=0; ii<bs; ii++,value+=stepval) { 1854 for (jj=ii; jj<bs2; jj+=bs) { 1855 bap[jj] = *value++; 1856 } 1857 } 1858 } else { 1859 for (ii=0; ii<bs; ii++,value+=stepval) { 1860 for (jj=0; jj<bs; jj++) { 1861 *bap++ = *value++; 1862 } 1863 } 1864 } 1865 noinsert2:; 1866 low = i; 1867 } 1868 ailen[row] = nrow; 1869 } 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1875 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1876 { 1877 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1878 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1879 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1880 PetscErrorCode ierr; 1881 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1882 MatScalar *aa = a->a,*ap; 1883 PetscReal ratio=0.6; 1884 1885 PetscFunctionBegin; 1886 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1887 1888 if (m) rmax = ailen[0]; 1889 for (i=1; i<mbs; i++) { 1890 /* move each row back by the amount of empty slots (fshift) before it*/ 1891 fshift += imax[i-1] - ailen[i-1]; 1892 rmax = PetscMax(rmax,ailen[i]); 1893 if (fshift) { 1894 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1895 N = ailen[i]; 1896 for (j=0; j<N; j++) { 1897 ip[j-fshift] = ip[j]; 1898 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1899 } 1900 } 1901 ai[i] = ai[i-1] + ailen[i-1]; 1902 } 1903 if (mbs) { 1904 fshift += imax[mbs-1] - ailen[mbs-1]; 1905 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1906 } 1907 /* reset ilen and imax for each row */ 1908 for (i=0; i<mbs; i++) { 1909 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1910 } 1911 a->nz = ai[mbs]; 1912 1913 /* diagonals may have moved, so kill the diagonal pointers */ 1914 a->idiagvalid = PETSC_FALSE; 1915 if (fshift && a->diag) { 1916 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1917 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1918 a->diag = 0; 1919 } 1920 if (fshift && a->nounused == -1) { 1921 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1922 } 1923 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1924 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1925 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1926 a->reallocs = 0; 1927 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1928 1929 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1930 if (a->compressedrow.use){ 1931 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1932 } 1933 1934 A->same_nonzero = PETSC_TRUE; 1935 PetscFunctionReturn(0); 1936 } 1937 1938 /* 1939 This function returns an array of flags which indicate the locations of contiguous 1940 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1941 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1942 Assume: sizes should be long enough to hold all the values. 1943 */ 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1946 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1947 { 1948 PetscInt i,j,k,row; 1949 PetscTruth flg; 1950 1951 PetscFunctionBegin; 1952 for (i=0,j=0; i<n; j++) { 1953 row = idx[i]; 1954 if (row%bs!=0) { /* Not the begining of a block */ 1955 sizes[j] = 1; 1956 i++; 1957 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1958 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1959 i++; 1960 } else { /* Begining of the block, so check if the complete block exists */ 1961 flg = PETSC_TRUE; 1962 for (k=1; k<bs; k++) { 1963 if (row+k != idx[i+k]) { /* break in the block */ 1964 flg = PETSC_FALSE; 1965 break; 1966 } 1967 } 1968 if (flg) { /* No break in the bs */ 1969 sizes[j] = bs; 1970 i+= bs; 1971 } else { 1972 sizes[j] = 1; 1973 i++; 1974 } 1975 } 1976 } 1977 *bs_max = j; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1983 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1984 { 1985 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1986 PetscErrorCode ierr; 1987 PetscInt i,j,k,count,*rows; 1988 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1989 PetscScalar zero = 0.0; 1990 MatScalar *aa; 1991 1992 PetscFunctionBegin; 1993 /* Make a copy of the IS and sort it */ 1994 /* allocate memory for rows,sizes */ 1995 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1996 sizes = rows + is_n; 1997 1998 /* copy IS values to rows, and sort them */ 1999 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2000 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2001 if (baij->keepzeroedrows) { 2002 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2003 bs_max = is_n; 2004 A->same_nonzero = PETSC_TRUE; 2005 } else { 2006 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2007 A->same_nonzero = PETSC_FALSE; 2008 } 2009 2010 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2011 row = rows[j]; 2012 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2013 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2014 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2015 if (sizes[i] == bs && !baij->keepzeroedrows) { 2016 if (diag != 0.0) { 2017 if (baij->ilen[row/bs] > 0) { 2018 baij->ilen[row/bs] = 1; 2019 baij->j[baij->i[row/bs]] = row/bs; 2020 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2021 } 2022 /* Now insert all the diagonal values for this bs */ 2023 for (k=0; k<bs; k++) { 2024 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2025 } 2026 } else { /* (diag == 0.0) */ 2027 baij->ilen[row/bs] = 0; 2028 } /* end (diag == 0.0) */ 2029 } else { /* (sizes[i] != bs) */ 2030 #if defined (PETSC_USE_DEBUG) 2031 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2032 #endif 2033 for (k=0; k<count; k++) { 2034 aa[0] = zero; 2035 aa += bs; 2036 } 2037 if (diag != 0.0) { 2038 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2039 } 2040 } 2041 } 2042 2043 ierr = PetscFree(rows);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2050 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2051 { 2052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2053 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2054 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2055 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2056 PetscErrorCode ierr; 2057 PetscInt ridx,cidx,bs2=a->bs2; 2058 PetscTruth roworiented=a->roworiented; 2059 MatScalar *ap,value,*aa=a->a,*bap; 2060 2061 PetscFunctionBegin; 2062 for (k=0; k<m; k++) { /* loop over added rows */ 2063 row = im[k]; 2064 brow = row/bs; 2065 if (row < 0) continue; 2066 #if defined(PETSC_USE_DEBUG) 2067 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2068 #endif 2069 rp = aj + ai[brow]; 2070 ap = aa + bs2*ai[brow]; 2071 rmax = imax[brow]; 2072 nrow = ailen[brow]; 2073 low = 0; 2074 high = nrow; 2075 for (l=0; l<n; l++) { /* loop over added columns */ 2076 if (in[l] < 0) continue; 2077 #if defined(PETSC_USE_DEBUG) 2078 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2079 #endif 2080 col = in[l]; bcol = col/bs; 2081 ridx = row % bs; cidx = col % bs; 2082 if (roworiented) { 2083 value = v[l + k*n]; 2084 } else { 2085 value = v[k + l*m]; 2086 } 2087 if (col <= lastcol) low = 0; else high = nrow; 2088 lastcol = col; 2089 while (high-low > 7) { 2090 t = (low+high)/2; 2091 if (rp[t] > bcol) high = t; 2092 else low = t; 2093 } 2094 for (i=low; i<high; i++) { 2095 if (rp[i] > bcol) break; 2096 if (rp[i] == bcol) { 2097 bap = ap + bs2*i + bs*cidx + ridx; 2098 if (is == ADD_VALUES) *bap += value; 2099 else *bap = value; 2100 goto noinsert1; 2101 } 2102 } 2103 if (nonew == 1) goto noinsert1; 2104 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2105 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2106 N = nrow++ - 1; high++; 2107 /* shift up all the later entries in this row */ 2108 for (ii=N; ii>=i; ii--) { 2109 rp[ii+1] = rp[ii]; 2110 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2111 } 2112 if (N>=i) { 2113 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2114 } 2115 rp[i] = bcol; 2116 ap[bs2*i + bs*cidx + ridx] = value; 2117 a->nz++; 2118 noinsert1:; 2119 low = i; 2120 } 2121 ailen[brow] = nrow; 2122 } 2123 A->same_nonzero = PETSC_FALSE; 2124 PetscFunctionReturn(0); 2125 } 2126 2127 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2131 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2132 { 2133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2134 Mat outA; 2135 PetscErrorCode ierr; 2136 PetscTruth row_identity,col_identity; 2137 2138 PetscFunctionBegin; 2139 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2140 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2141 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2142 if (!row_identity || !col_identity) { 2143 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2144 } 2145 2146 outA = inA; 2147 inA->factor = MAT_FACTOR_LU; 2148 2149 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2150 2151 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2152 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2153 a->row = row; 2154 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2155 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2156 a->col = col; 2157 2158 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2159 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2160 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2161 2162 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2163 if (!a->solve_work) { 2164 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2165 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2166 } 2167 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2168 2169 PetscFunctionReturn(0); 2170 } 2171 2172 EXTERN_C_BEGIN 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2175 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2176 { 2177 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2178 PetscInt i,nz,mbs; 2179 2180 PetscFunctionBegin; 2181 nz = baij->maxnz/baij->bs2; 2182 mbs = baij->mbs; 2183 for (i=0; i<nz; i++) { 2184 baij->j[i] = indices[i]; 2185 } 2186 baij->nz = nz; 2187 for (i=0; i<mbs; i++) { 2188 baij->ilen[i] = baij->imax[i]; 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 EXTERN_C_END 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2196 /*@ 2197 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2198 in the matrix. 2199 2200 Input Parameters: 2201 + mat - the SeqBAIJ matrix 2202 - indices - the column indices 2203 2204 Level: advanced 2205 2206 Notes: 2207 This can be called if you have precomputed the nonzero structure of the 2208 matrix and want to provide it to the matrix object to improve the performance 2209 of the MatSetValues() operation. 2210 2211 You MUST have set the correct numbers of nonzeros per row in the call to 2212 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2213 2214 MUST be called before any calls to MatSetValues(); 2215 2216 @*/ 2217 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2218 { 2219 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2220 2221 PetscFunctionBegin; 2222 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2223 PetscValidPointer(indices,2); 2224 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2225 if (f) { 2226 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2227 } else { 2228 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2235 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2236 { 2237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2238 PetscErrorCode ierr; 2239 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2240 PetscReal atmp; 2241 PetscScalar *x,zero = 0.0; 2242 MatScalar *aa; 2243 PetscInt ncols,brow,krow,kcol; 2244 2245 PetscFunctionBegin; 2246 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2247 bs = A->rmap->bs; 2248 aa = a->a; 2249 ai = a->i; 2250 aj = a->j; 2251 mbs = a->mbs; 2252 2253 ierr = VecSet(v,zero);CHKERRQ(ierr); 2254 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2255 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2256 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2257 for (i=0; i<mbs; i++) { 2258 ncols = ai[1] - ai[0]; ai++; 2259 brow = bs*i; 2260 for (j=0; j<ncols; j++){ 2261 for (kcol=0; kcol<bs; kcol++){ 2262 for (krow=0; krow<bs; krow++){ 2263 atmp = PetscAbsScalar(*aa);aa++; 2264 row = brow + krow; /* row index */ 2265 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2266 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2267 } 2268 } 2269 aj++; 2270 } 2271 } 2272 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatCopy_SeqBAIJ" 2278 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2279 { 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 /* If the two matrices have the same copy implementation, use fast copy. */ 2284 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2285 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2286 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2287 2288 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2289 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2290 } 2291 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2292 } else { 2293 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2294 } 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2300 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2301 { 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2311 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2312 { 2313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2314 PetscFunctionBegin; 2315 *array = a->a; 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2321 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2322 { 2323 PetscFunctionBegin; 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #include "petscblaslapack.h" 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2330 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2331 { 2332 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2333 PetscErrorCode ierr; 2334 PetscInt i,bs=Y->rmap->bs,j,bs2; 2335 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2336 2337 PetscFunctionBegin; 2338 if (str == SAME_NONZERO_PATTERN) { 2339 PetscScalar alpha = a; 2340 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2341 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2342 if (y->xtoy && y->XtoY != X) { 2343 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2344 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2345 } 2346 if (!y->xtoy) { /* get xtoy */ 2347 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2348 y->XtoY = X; 2349 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2350 } 2351 bs2 = bs*bs; 2352 for (i=0; i<x->nz; i++) { 2353 j = 0; 2354 while (j < bs2){ 2355 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2356 j++; 2357 } 2358 } 2359 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2360 } else { 2361 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2362 } 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2368 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2369 { 2370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2371 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2372 MatScalar *aa = a->a; 2373 2374 PetscFunctionBegin; 2375 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2381 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2382 { 2383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2384 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2385 MatScalar *aa = a->a; 2386 2387 PetscFunctionBegin; 2388 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 2393 /* -------------------------------------------------------------------*/ 2394 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2395 MatGetRow_SeqBAIJ, 2396 MatRestoreRow_SeqBAIJ, 2397 MatMult_SeqBAIJ_N, 2398 /* 4*/ MatMultAdd_SeqBAIJ_N, 2399 MatMultTranspose_SeqBAIJ, 2400 MatMultTransposeAdd_SeqBAIJ, 2401 0, 2402 0, 2403 0, 2404 /*10*/ 0, 2405 MatLUFactor_SeqBAIJ, 2406 0, 2407 0, 2408 MatTranspose_SeqBAIJ, 2409 /*15*/ MatGetInfo_SeqBAIJ, 2410 MatEqual_SeqBAIJ, 2411 MatGetDiagonal_SeqBAIJ, 2412 MatDiagonalScale_SeqBAIJ, 2413 MatNorm_SeqBAIJ, 2414 /*20*/ 0, 2415 MatAssemblyEnd_SeqBAIJ, 2416 MatSetOption_SeqBAIJ, 2417 MatZeroEntries_SeqBAIJ, 2418 /*24*/ MatZeroRows_SeqBAIJ, 2419 0, 2420 0, 2421 0, 2422 0, 2423 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2424 0, 2425 0, 2426 MatGetArray_SeqBAIJ, 2427 MatRestoreArray_SeqBAIJ, 2428 /*34*/ MatDuplicate_SeqBAIJ, 2429 0, 2430 0, 2431 MatILUFactor_SeqBAIJ, 2432 0, 2433 /*39*/ MatAXPY_SeqBAIJ, 2434 MatGetSubMatrices_SeqBAIJ, 2435 MatIncreaseOverlap_SeqBAIJ, 2436 MatGetValues_SeqBAIJ, 2437 MatCopy_SeqBAIJ, 2438 /*44*/ 0, 2439 MatScale_SeqBAIJ, 2440 0, 2441 0, 2442 MatILUDTFactor_SeqBAIJ, 2443 /*49*/ 0, 2444 MatGetRowIJ_SeqBAIJ, 2445 MatRestoreRowIJ_SeqBAIJ, 2446 0, 2447 0, 2448 /*54*/ 0, 2449 0, 2450 0, 2451 0, 2452 MatSetValuesBlocked_SeqBAIJ, 2453 /*59*/ MatGetSubMatrix_SeqBAIJ, 2454 MatDestroy_SeqBAIJ, 2455 MatView_SeqBAIJ, 2456 0, 2457 0, 2458 /*64*/ 0, 2459 0, 2460 0, 2461 0, 2462 0, 2463 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2464 0, 2465 MatConvert_Basic, 2466 0, 2467 0, 2468 /*74*/ 0, 2469 0, 2470 0, 2471 0, 2472 0, 2473 /*79*/ 0, 2474 0, 2475 0, 2476 0, 2477 MatLoad_SeqBAIJ, 2478 /*84*/ 0, 2479 0, 2480 0, 2481 0, 2482 0, 2483 /*89*/ 0, 2484 0, 2485 0, 2486 0, 2487 0, 2488 /*94*/ 0, 2489 0, 2490 0, 2491 0, 2492 0, 2493 /*99*/0, 2494 0, 2495 0, 2496 0, 2497 0, 2498 /*104*/0, 2499 MatRealPart_SeqBAIJ, 2500 MatImaginaryPart_SeqBAIJ, 2501 0, 2502 0, 2503 /*109*/0, 2504 0, 2505 0, 2506 0, 2507 MatMissingDiagonal_SeqBAIJ 2508 /*114*/ 2509 }; 2510 2511 EXTERN_C_BEGIN 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2514 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2515 { 2516 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2517 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 if (aij->nonew != 1) { 2522 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2523 } 2524 2525 /* allocate space for values if not already there */ 2526 if (!aij->saved_values) { 2527 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2528 } 2529 2530 /* copy values over */ 2531 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 EXTERN_C_END 2535 2536 EXTERN_C_BEGIN 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2539 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2540 { 2541 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2542 PetscErrorCode ierr; 2543 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2544 2545 PetscFunctionBegin; 2546 if (aij->nonew != 1) { 2547 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2548 } 2549 if (!aij->saved_values) { 2550 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2551 } 2552 2553 /* copy values over */ 2554 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 EXTERN_C_END 2558 2559 EXTERN_C_BEGIN 2560 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2561 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2562 EXTERN_C_END 2563 2564 EXTERN_C_BEGIN 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2567 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2568 { 2569 Mat_SeqBAIJ *b; 2570 PetscErrorCode ierr; 2571 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2572 PetscTruth flg,skipallocation = PETSC_FALSE; 2573 2574 PetscFunctionBegin; 2575 2576 if (nz == MAT_SKIP_ALLOCATION) { 2577 skipallocation = PETSC_TRUE; 2578 nz = 0; 2579 } 2580 2581 if (bs < 0) { 2582 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2583 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2584 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2585 bs = PetscAbs(bs); 2586 } 2587 if (nnz && newbs != bs) { 2588 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2589 } 2590 bs = newbs; 2591 2592 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2593 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2594 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2595 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2596 2597 B->preallocated = PETSC_TRUE; 2598 2599 mbs = B->rmap->n/bs; 2600 nbs = B->cmap->n/bs; 2601 bs2 = bs*bs; 2602 2603 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2604 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2605 } 2606 2607 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2608 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2609 if (nnz) { 2610 for (i=0; i<mbs; i++) { 2611 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2612 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2613 } 2614 } 2615 2616 b = (Mat_SeqBAIJ*)B->data; 2617 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2618 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2619 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2620 2621 if (!flg) { 2622 switch (bs) { 2623 case 1: 2624 B->ops->mult = MatMult_SeqBAIJ_1; 2625 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2626 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2627 break; 2628 case 2: 2629 B->ops->mult = MatMult_SeqBAIJ_2; 2630 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2631 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2632 break; 2633 case 3: 2634 B->ops->mult = MatMult_SeqBAIJ_3; 2635 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2636 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2637 break; 2638 case 4: 2639 B->ops->mult = MatMult_SeqBAIJ_4; 2640 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2641 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2642 break; 2643 case 5: 2644 B->ops->mult = MatMult_SeqBAIJ_5; 2645 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2646 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2647 break; 2648 case 6: 2649 B->ops->mult = MatMult_SeqBAIJ_6; 2650 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2651 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2652 break; 2653 case 7: 2654 B->ops->mult = MatMult_SeqBAIJ_7; 2655 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2656 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2657 break; 2658 default: 2659 B->ops->mult = MatMult_SeqBAIJ_N; 2660 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2661 break; 2662 } 2663 } 2664 B->rmap->bs = bs; 2665 b->mbs = mbs; 2666 b->nbs = nbs; 2667 if (!skipallocation) { 2668 if (!b->imax) { 2669 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2670 } 2671 /* b->ilen will count nonzeros in each block row so far. */ 2672 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2673 if (!nnz) { 2674 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2675 else if (nz <= 0) nz = 1; 2676 for (i=0; i<mbs; i++) b->imax[i] = nz; 2677 nz = nz*mbs; 2678 } else { 2679 nz = 0; 2680 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2681 } 2682 2683 /* allocate the matrix space */ 2684 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2685 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2686 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2687 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2688 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2689 b->singlemalloc = PETSC_TRUE; 2690 b->i[0] = 0; 2691 for (i=1; i<mbs+1; i++) { 2692 b->i[i] = b->i[i-1] + b->imax[i-1]; 2693 } 2694 b->free_a = PETSC_TRUE; 2695 b->free_ij = PETSC_TRUE; 2696 } else { 2697 b->free_a = PETSC_FALSE; 2698 b->free_ij = PETSC_FALSE; 2699 } 2700 2701 B->rmap->bs = bs; 2702 b->bs2 = bs2; 2703 b->mbs = mbs; 2704 b->nz = 0; 2705 b->maxnz = nz*bs2; 2706 B->info.nz_unneeded = (PetscReal)b->maxnz; 2707 PetscFunctionReturn(0); 2708 } 2709 EXTERN_C_END 2710 2711 EXTERN_C_BEGIN 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2714 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2715 { 2716 PetscInt i,m,nz,nz_max=0,*nnz; 2717 PetscScalar *values=0; 2718 PetscErrorCode ierr; 2719 2720 PetscFunctionBegin; 2721 2722 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2723 2724 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2725 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2726 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2727 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2728 m = B->rmap->n/bs; 2729 2730 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2731 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2732 for(i=0; i<m; i++) { 2733 nz = ii[i+1]- ii[i]; 2734 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2735 nz_max = PetscMax(nz_max, nz); 2736 nnz[i] = nz; 2737 } 2738 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2739 ierr = PetscFree(nnz);CHKERRQ(ierr); 2740 2741 values = (PetscScalar*)V; 2742 if (!values) { 2743 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2744 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2745 } 2746 for (i=0; i<m; i++) { 2747 PetscInt ncols = ii[i+1] - ii[i]; 2748 const PetscInt *icols = jj + ii[i]; 2749 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2750 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2751 } 2752 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2753 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 2756 PetscFunctionReturn(0); 2757 } 2758 EXTERN_C_END 2759 2760 2761 EXTERN_C_BEGIN 2762 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2763 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 2764 EXTERN_C_END 2765 2766 /*MC 2767 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2768 block sparse compressed row format. 2769 2770 Options Database Keys: 2771 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2772 2773 Level: beginner 2774 2775 .seealso: MatCreateSeqBAIJ() 2776 M*/ 2777 2778 2779 EXTERN_C_BEGIN 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatCreate_SeqBAIJ" 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2783 { 2784 PetscErrorCode ierr; 2785 PetscMPIInt size; 2786 Mat_SeqBAIJ *b; 2787 2788 PetscFunctionBegin; 2789 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2790 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2791 2792 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2793 B->data = (void*)b; 2794 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2795 B->mapping = 0; 2796 b->row = 0; 2797 b->col = 0; 2798 b->icol = 0; 2799 b->reallocs = 0; 2800 b->saved_values = 0; 2801 2802 b->roworiented = PETSC_TRUE; 2803 b->nonew = 0; 2804 b->diag = 0; 2805 b->solve_work = 0; 2806 b->mult_work = 0; 2807 B->spptr = 0; 2808 B->info.nz_unneeded = (PetscReal)b->maxnz; 2809 b->keepzeroedrows = PETSC_FALSE; 2810 b->xtoy = 0; 2811 b->XtoY = 0; 2812 b->compressedrow.use = PETSC_FALSE; 2813 b->compressedrow.nrows = 0; 2814 b->compressedrow.i = PETSC_NULL; 2815 b->compressedrow.rindex = PETSC_NULL; 2816 b->compressedrow.checked = PETSC_FALSE; 2817 B->same_nonzero = PETSC_FALSE; 2818 2819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 2820 "MatGetFactorAvailable_seqbaij_petsc", 2821 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2823 "MatGetFactor_seqbaij_petsc", 2824 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2826 "MatInvertBlockDiagonal_SeqBAIJ", 2827 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2829 "MatStoreValues_SeqBAIJ", 2830 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2832 "MatRetrieveValues_SeqBAIJ", 2833 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2835 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2836 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2838 "MatConvert_SeqBAIJ_SeqAIJ", 2839 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2841 "MatConvert_SeqBAIJ_SeqSBAIJ", 2842 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2844 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2845 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2847 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2848 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2849 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 EXTERN_C_END 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2856 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 2857 { 2858 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2859 PetscErrorCode ierr; 2860 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2861 2862 PetscFunctionBegin; 2863 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2864 2865 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2866 for (i=0; i<mbs; i++) { 2867 c->imax[i] = a->imax[i]; 2868 c->ilen[i] = a->ilen[i]; 2869 } 2870 2871 /* allocate the matrix space */ 2872 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2873 c->singlemalloc = PETSC_TRUE; 2874 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2875 if (mbs > 0) { 2876 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2877 if (cpvalues == MAT_COPY_VALUES) { 2878 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2879 } else { 2880 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2881 } 2882 } 2883 c->roworiented = a->roworiented; 2884 c->nonew = a->nonew; 2885 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2886 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2887 c->bs2 = a->bs2; 2888 c->mbs = a->mbs; 2889 c->nbs = a->nbs; 2890 2891 if (a->diag) { 2892 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2893 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2894 for (i=0; i<mbs; i++) { 2895 c->diag[i] = a->diag[i]; 2896 } 2897 } else c->diag = 0; 2898 c->nz = a->nz; 2899 c->maxnz = a->maxnz; 2900 c->solve_work = 0; 2901 c->mult_work = 0; 2902 c->free_a = PETSC_TRUE; 2903 c->free_ij = PETSC_TRUE; 2904 C->preallocated = PETSC_TRUE; 2905 C->assembled = PETSC_TRUE; 2906 2907 c->compressedrow.use = a->compressedrow.use; 2908 c->compressedrow.nrows = a->compressedrow.nrows; 2909 c->compressedrow.checked = a->compressedrow.checked; 2910 if ( a->compressedrow.checked && a->compressedrow.use){ 2911 i = a->compressedrow.nrows; 2912 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2913 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2914 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2915 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2916 } else { 2917 c->compressedrow.use = PETSC_FALSE; 2918 c->compressedrow.i = PETSC_NULL; 2919 c->compressedrow.rindex = PETSC_NULL; 2920 } 2921 C->same_nonzero = A->same_nonzero; 2922 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2928 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2929 { 2930 PetscErrorCode ierr; 2931 2932 PetscFunctionBegin; 2933 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2934 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2935 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2936 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatLoad_SeqBAIJ" 2942 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2943 { 2944 Mat_SeqBAIJ *a; 2945 Mat B; 2946 PetscErrorCode ierr; 2947 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2948 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2949 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2950 PetscInt *masked,nmask,tmp,bs2,ishift; 2951 PetscMPIInt size; 2952 int fd; 2953 PetscScalar *aa; 2954 MPI_Comm comm = ((PetscObject)viewer)->comm; 2955 2956 PetscFunctionBegin; 2957 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2958 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2959 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2960 bs2 = bs*bs; 2961 2962 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2963 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2964 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2965 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2966 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2967 M = header[1]; N = header[2]; nz = header[3]; 2968 2969 if (header[3] < 0) { 2970 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2971 } 2972 2973 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2974 2975 /* 2976 This code adds extra rows to make sure the number of rows is 2977 divisible by the blocksize 2978 */ 2979 mbs = M/bs; 2980 extra_rows = bs - M + bs*(mbs); 2981 if (extra_rows == bs) extra_rows = 0; 2982 else mbs++; 2983 if (extra_rows) { 2984 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2985 } 2986 2987 /* read in row lengths */ 2988 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2989 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2990 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2991 2992 /* read in column indices */ 2993 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2994 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2995 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2996 2997 /* loop over row lengths determining block row lengths */ 2998 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2999 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3000 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3001 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3002 masked = mask + mbs; 3003 rowcount = 0; nzcount = 0; 3004 for (i=0; i<mbs; i++) { 3005 nmask = 0; 3006 for (j=0; j<bs; j++) { 3007 kmax = rowlengths[rowcount]; 3008 for (k=0; k<kmax; k++) { 3009 tmp = jj[nzcount++]/bs; 3010 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3011 } 3012 rowcount++; 3013 } 3014 browlengths[i] += nmask; 3015 /* zero out the mask elements we set */ 3016 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3017 } 3018 3019 /* create our matrix */ 3020 ierr = MatCreate(comm,&B); 3021 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3022 ierr = MatSetType(B,type);CHKERRQ(ierr); 3023 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3024 a = (Mat_SeqBAIJ*)B->data; 3025 3026 /* set matrix "i" values */ 3027 a->i[0] = 0; 3028 for (i=1; i<= mbs; i++) { 3029 a->i[i] = a->i[i-1] + browlengths[i-1]; 3030 a->ilen[i-1] = browlengths[i-1]; 3031 } 3032 a->nz = 0; 3033 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3034 3035 /* read in nonzero values */ 3036 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3037 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3038 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3039 3040 /* set "a" and "j" values into matrix */ 3041 nzcount = 0; jcount = 0; 3042 for (i=0; i<mbs; i++) { 3043 nzcountb = nzcount; 3044 nmask = 0; 3045 for (j=0; j<bs; j++) { 3046 kmax = rowlengths[i*bs+j]; 3047 for (k=0; k<kmax; k++) { 3048 tmp = jj[nzcount++]/bs; 3049 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3050 } 3051 } 3052 /* sort the masked values */ 3053 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3054 3055 /* set "j" values into matrix */ 3056 maskcount = 1; 3057 for (j=0; j<nmask; j++) { 3058 a->j[jcount++] = masked[j]; 3059 mask[masked[j]] = maskcount++; 3060 } 3061 /* set "a" values into matrix */ 3062 ishift = bs2*a->i[i]; 3063 for (j=0; j<bs; j++) { 3064 kmax = rowlengths[i*bs+j]; 3065 for (k=0; k<kmax; k++) { 3066 tmp = jj[nzcountb]/bs ; 3067 block = mask[tmp] - 1; 3068 point = jj[nzcountb] - bs*tmp; 3069 idx = ishift + bs2*block + j + bs*point; 3070 a->a[idx] = (MatScalar)aa[nzcountb++]; 3071 } 3072 } 3073 /* zero out the mask elements we set */ 3074 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3075 } 3076 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3077 3078 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3079 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3080 ierr = PetscFree(aa);CHKERRQ(ierr); 3081 ierr = PetscFree(jj);CHKERRQ(ierr); 3082 ierr = PetscFree(mask);CHKERRQ(ierr); 3083 3084 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3085 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3086 ierr = MatView_Private(B);CHKERRQ(ierr); 3087 3088 *A = B; 3089 PetscFunctionReturn(0); 3090 } 3091 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatCreateSeqBAIJ" 3094 /*@C 3095 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3096 compressed row) format. For good matrix assembly performance the 3097 user should preallocate the matrix storage by setting the parameter nz 3098 (or the array nnz). By setting these parameters accurately, performance 3099 during matrix assembly can be increased by more than a factor of 50. 3100 3101 Collective on MPI_Comm 3102 3103 Input Parameters: 3104 + comm - MPI communicator, set to PETSC_COMM_SELF 3105 . bs - size of block 3106 . m - number of rows 3107 . n - number of columns 3108 . nz - number of nonzero blocks per block row (same for all rows) 3109 - nnz - array containing the number of nonzero blocks in the various block rows 3110 (possibly different for each block row) or PETSC_NULL 3111 3112 Output Parameter: 3113 . A - the matrix 3114 3115 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3116 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3117 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3118 3119 Options Database Keys: 3120 . -mat_no_unroll - uses code that does not unroll the loops in the 3121 block calculations (much slower) 3122 . -mat_block_size - size of the blocks to use 3123 3124 Level: intermediate 3125 3126 Notes: 3127 The number of rows and columns must be divisible by blocksize. 3128 3129 If the nnz parameter is given then the nz parameter is ignored 3130 3131 A nonzero block is any block that as 1 or more nonzeros in it 3132 3133 The block AIJ format is fully compatible with standard Fortran 77 3134 storage. That is, the stored row and column indices can begin at 3135 either one (as in Fortran) or zero. See the users' manual for details. 3136 3137 Specify the preallocated storage with either nz or nnz (not both). 3138 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3139 allocation. For additional details, see the users manual chapter on 3140 matrices. 3141 3142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3143 @*/ 3144 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3145 { 3146 PetscErrorCode ierr; 3147 3148 PetscFunctionBegin; 3149 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3150 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3151 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3152 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3158 /*@C 3159 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3160 per row in the matrix. For good matrix assembly performance the 3161 user should preallocate the matrix storage by setting the parameter nz 3162 (or the array nnz). By setting these parameters accurately, performance 3163 during matrix assembly can be increased by more than a factor of 50. 3164 3165 Collective on MPI_Comm 3166 3167 Input Parameters: 3168 + A - the matrix 3169 . bs - size of block 3170 . nz - number of block nonzeros per block row (same for all rows) 3171 - nnz - array containing the number of block nonzeros in the various block rows 3172 (possibly different for each block row) or PETSC_NULL 3173 3174 Options Database Keys: 3175 . -mat_no_unroll - uses code that does not unroll the loops in the 3176 block calculations (much slower) 3177 . -mat_block_size - size of the blocks to use 3178 3179 Level: intermediate 3180 3181 Notes: 3182 If the nnz parameter is given then the nz parameter is ignored 3183 3184 You can call MatGetInfo() to get information on how effective the preallocation was; 3185 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3186 You can also run with the option -info and look for messages with the string 3187 malloc in them to see if additional memory allocation was needed. 3188 3189 The block AIJ format is fully compatible with standard Fortran 77 3190 storage. That is, the stored row and column indices can begin at 3191 either one (as in Fortran) or zero. See the users' manual for details. 3192 3193 Specify the preallocated storage with either nz or nnz (not both). 3194 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3195 allocation. For additional details, see the users manual chapter on 3196 matrices. 3197 3198 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3199 @*/ 3200 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3201 { 3202 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3203 3204 PetscFunctionBegin; 3205 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3206 if (f) { 3207 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3208 } 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3214 /*@C 3215 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3216 (the default sequential PETSc format). 3217 3218 Collective on MPI_Comm 3219 3220 Input Parameters: 3221 + A - the matrix 3222 . i - the indices into j for the start of each local row (starts with zero) 3223 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3224 - v - optional values in the matrix 3225 3226 Level: developer 3227 3228 .keywords: matrix, aij, compressed row, sparse 3229 3230 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3231 @*/ 3232 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3233 { 3234 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3235 3236 PetscFunctionBegin; 3237 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3238 if (f) { 3239 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3240 } 3241 PetscFunctionReturn(0); 3242 } 3243 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3247 /*@ 3248 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3249 (upper triangular entries in CSR format) provided by the user. 3250 3251 Collective on MPI_Comm 3252 3253 Input Parameters: 3254 + comm - must be an MPI communicator of size 1 3255 . bs - size of block 3256 . m - number of rows 3257 . n - number of columns 3258 . i - row indices 3259 . j - column indices 3260 - a - matrix values 3261 3262 Output Parameter: 3263 . mat - the matrix 3264 3265 Level: intermediate 3266 3267 Notes: 3268 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3269 once the matrix is destroyed 3270 3271 You cannot set new nonzero locations into this matrix, that will generate an error. 3272 3273 The i and j indices are 0 based 3274 3275 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3276 3277 @*/ 3278 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3279 { 3280 PetscErrorCode ierr; 3281 PetscInt ii; 3282 Mat_SeqBAIJ *baij; 3283 3284 PetscFunctionBegin; 3285 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3286 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3287 3288 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3290 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3291 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3292 baij = (Mat_SeqBAIJ*)(*mat)->data; 3293 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3294 3295 baij->i = i; 3296 baij->j = j; 3297 baij->a = a; 3298 baij->singlemalloc = PETSC_FALSE; 3299 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3300 baij->free_a = PETSC_FALSE; 3301 baij->free_ij = PETSC_FALSE; 3302 3303 for (ii=0; ii<m; ii++) { 3304 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3305 #if defined(PETSC_USE_DEBUG) 3306 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3307 #endif 3308 } 3309 #if defined(PETSC_USE_DEBUG) 3310 for (ii=0; ii<baij->i[m]; ii++) { 3311 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3312 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3313 } 3314 #endif 3315 3316 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320