1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "../src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "../src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 diag = a->diag; 1105 for (i=0; i<a->mbs; i++) { 1106 if (jj[diag[i]] != i) { 1107 *missing = PETSC_TRUE; 1108 if (d) *d = i; 1109 } 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,m = a->mbs; 1121 1122 PetscFunctionBegin; 1123 if (!a->diag) { 1124 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1125 } 1126 for (i=0; i<m; i++) { 1127 a->diag[i] = a->i[i+1]; 1128 for (j=a->i[i]; j<a->i[i+1]; j++) { 1129 if (a->j[j] == i) { 1130 a->diag[i] = j; 1131 break; 1132 } 1133 } 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1148 PetscInt *tia, *tja; 1149 1150 PetscFunctionBegin; 1151 *nn = n; 1152 if (!ia) PetscFunctionReturn(0); 1153 if (symmetric) { 1154 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1155 } else { 1156 tia = a->i; tja = a->j; 1157 } 1158 1159 if (!blockcompressed && bs > 1) { 1160 (*nn) *= bs; 1161 nbs = bs; 1162 /* malloc & create the natural set of indices */ 1163 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1164 if (n) { 1165 (*ia)[0] = 0; 1166 for (j=1; j<bs; j++) { 1167 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1168 } 1169 } 1170 1171 for (i=1; i<n; i++) { 1172 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1175 } 1176 } 1177 if (n) { 1178 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1179 } 1180 1181 if (ja) { 1182 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<n; i++) { 1185 for (j=0; j<bs; j++) { 1186 for (k=tia[i]; k<tia[i+1]; k++) { 1187 for (l=0; l<bs; l++) { 1188 (*ja)[cnt++] = bs*tja[k] + l; 1189 } 1190 } 1191 } 1192 } 1193 } 1194 1195 n *= bs; 1196 nz *= bs*bs; 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 ierr = PetscFree(tia);CHKERRQ(ierr); 1199 ierr = PetscFree(tja);CHKERRQ(ierr); 1200 } 1201 } else { 1202 *ia = tia; 1203 if (ja) *ja = tja; 1204 } 1205 if (oshift == 1) { 1206 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1207 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt i,n = a->mbs,nz = a->i[n]; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if (!blockcompressed && A->rmap->bs > 1) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } else if (symmetric) { 1226 ierr = PetscFree(*ia);CHKERRQ(ierr); 1227 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1228 } else if (oshift == 1) { /* blockcompressed */ 1229 for (i=0; i<n+1; i++) a->i[i]--; 1230 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 #if defined(PETSC_USE_LOG) 1244 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1245 #endif 1246 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1247 if (a->row) { 1248 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1249 } 1250 if (a->col) { 1251 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1254 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1255 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1256 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1257 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1258 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1259 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1260 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1261 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1262 1263 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1264 ierr = PetscFree(a);CHKERRQ(ierr); 1265 1266 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1280 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1281 { 1282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 switch (op) { 1287 case MAT_ROW_ORIENTED: 1288 a->roworiented = flg; 1289 break; 1290 case MAT_KEEP_ZEROED_ROWS: 1291 a->keepzeroedrows = flg; 1292 break; 1293 case MAT_NEW_NONZERO_LOCATIONS: 1294 a->nonew = (flg ? 0 : 1); 1295 break; 1296 case MAT_NEW_NONZERO_LOCATION_ERR: 1297 a->nonew = (flg ? -1 : 0); 1298 break; 1299 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1300 a->nonew = (flg ? -2 : 0); 1301 break; 1302 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1303 a->nounused = (flg ? -1 : 0); 1304 break; 1305 case MAT_NEW_DIAGONALS: 1306 case MAT_IGNORE_OFF_PROC_ENTRIES: 1307 case MAT_USE_HASH_TABLE: 1308 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1309 break; 1310 case MAT_SYMMETRIC: 1311 case MAT_STRUCTURALLY_SYMMETRIC: 1312 case MAT_HERMITIAN: 1313 case MAT_SYMMETRY_ETERNAL: 1314 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1315 break; 1316 default: 1317 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1329 MatScalar *aa,*aa_i; 1330 PetscScalar *v_i; 1331 1332 PetscFunctionBegin; 1333 bs = A->rmap->bs; 1334 ai = a->i; 1335 aj = a->j; 1336 aa = a->a; 1337 bs2 = a->bs2; 1338 1339 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1340 1341 bn = row/bs; /* Block number */ 1342 bp = row % bs; /* Block Position */ 1343 M = ai[bn+1] - ai[bn]; 1344 *nz = bs*M; 1345 1346 if (v) { 1347 *v = 0; 1348 if (*nz) { 1349 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1350 for (i=0; i<M; i++) { /* for each block in the block row */ 1351 v_i = *v + i*bs; 1352 aa_i = aa + bs2*(ai[bn] + i); 1353 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1354 } 1355 } 1356 } 1357 1358 if (idx) { 1359 *idx = 0; 1360 if (*nz) { 1361 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1362 for (i=0; i<M; i++) { /* for each block in the block row */ 1363 idx_i = *idx + i*bs; 1364 itmp = bs*aj[ai[bn] + i]; 1365 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1366 } 1367 } 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1374 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1375 { 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1380 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1386 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1387 { 1388 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1389 Mat C; 1390 PetscErrorCode ierr; 1391 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1392 PetscInt *rows,*cols,bs2=a->bs2; 1393 MatScalar *array; 1394 1395 PetscFunctionBegin; 1396 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1398 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1399 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1400 1401 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1402 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1403 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1404 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1405 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1406 ierr = PetscFree(col);CHKERRQ(ierr); 1407 } else { 1408 C = *B; 1409 } 1410 1411 array = a->a; 1412 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1413 cols = rows + bs; 1414 for (i=0; i<mbs; i++) { 1415 cols[0] = i*bs; 1416 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1417 len = ai[i+1] - ai[i]; 1418 for (j=0; j<len; j++) { 1419 rows[0] = (*aj++)*bs; 1420 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1421 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1422 array += bs2; 1423 } 1424 } 1425 ierr = PetscFree(rows);CHKERRQ(ierr); 1426 1427 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 1430 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1431 *B = C; 1432 } else { 1433 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1434 } 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1440 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1441 { 1442 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1443 PetscErrorCode ierr; 1444 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1445 int fd; 1446 PetscScalar *aa; 1447 FILE *file; 1448 1449 PetscFunctionBegin; 1450 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1451 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1452 col_lens[0] = MAT_FILE_COOKIE; 1453 1454 col_lens[1] = A->rmap->N; 1455 col_lens[2] = A->cmap->n; 1456 col_lens[3] = a->nz*bs2; 1457 1458 /* store lengths of each row and write (including header) to file */ 1459 count = 0; 1460 for (i=0; i<a->mbs; i++) { 1461 for (j=0; j<bs; j++) { 1462 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1463 } 1464 } 1465 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1466 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1467 1468 /* store column indices (zero start index) */ 1469 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1470 count = 0; 1471 for (i=0; i<a->mbs; i++) { 1472 for (j=0; j<bs; j++) { 1473 for (k=a->i[i]; k<a->i[i+1]; k++) { 1474 for (l=0; l<bs; l++) { 1475 jj[count++] = bs*a->j[k] + l; 1476 } 1477 } 1478 } 1479 } 1480 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1481 ierr = PetscFree(jj);CHKERRQ(ierr); 1482 1483 /* store nonzero values */ 1484 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1485 count = 0; 1486 for (i=0; i<a->mbs; i++) { 1487 for (j=0; j<bs; j++) { 1488 for (k=a->i[i]; k<a->i[i+1]; k++) { 1489 for (l=0; l<bs; l++) { 1490 aa[count++] = a->a[bs2*k + l*bs + j]; 1491 } 1492 } 1493 } 1494 } 1495 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscFree(aa);CHKERRQ(ierr); 1497 1498 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1499 if (file) { 1500 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1507 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1508 { 1509 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1510 PetscErrorCode ierr; 1511 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1512 PetscViewerFormat format; 1513 1514 PetscFunctionBegin; 1515 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1516 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1517 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1518 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1519 Mat aij; 1520 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1521 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1522 ierr = MatDestroy(aij);CHKERRQ(ierr); 1523 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1524 PetscFunctionReturn(0); 1525 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1526 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1527 for (i=0; i<a->mbs; i++) { 1528 for (j=0; j<bs; j++) { 1529 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1530 for (k=a->i[i]; k<a->i[i+1]; k++) { 1531 for (l=0; l<bs; l++) { 1532 #if defined(PETSC_USE_COMPLEX) 1533 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1535 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1538 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1539 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } 1542 #else 1543 if (a->a[bs2*k + l*bs + j] != 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1545 } 1546 #endif 1547 } 1548 } 1549 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1550 } 1551 } 1552 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1553 } else { 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1555 for (i=0; i<a->mbs; i++) { 1556 for (j=0; j<bs; j++) { 1557 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1558 for (k=a->i[i]; k<a->i[i+1]; k++) { 1559 for (l=0; l<bs; l++) { 1560 #if defined(PETSC_USE_COMPLEX) 1561 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1563 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1566 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1567 } else { 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } 1570 #else 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1572 #endif 1573 } 1574 } 1575 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1576 } 1577 } 1578 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1579 } 1580 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1587 { 1588 Mat A = (Mat) Aa; 1589 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1590 PetscErrorCode ierr; 1591 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1592 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1593 MatScalar *aa; 1594 PetscViewer viewer; 1595 1596 PetscFunctionBegin; 1597 1598 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1599 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1600 1601 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1602 1603 /* loop over matrix elements drawing boxes */ 1604 color = PETSC_DRAW_BLUE; 1605 for (i=0,row=0; i<mbs; i++,row+=bs) { 1606 for (j=a->i[i]; j<a->i[i+1]; j++) { 1607 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1608 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1609 aa = a->a + j*bs2; 1610 for (k=0; k<bs; k++) { 1611 for (l=0; l<bs; l++) { 1612 if (PetscRealPart(*aa++) >= 0.) continue; 1613 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1614 } 1615 } 1616 } 1617 } 1618 color = PETSC_DRAW_CYAN; 1619 for (i=0,row=0; i<mbs; i++,row+=bs) { 1620 for (j=a->i[i]; j<a->i[i+1]; j++) { 1621 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1622 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1623 aa = a->a + j*bs2; 1624 for (k=0; k<bs; k++) { 1625 for (l=0; l<bs; l++) { 1626 if (PetscRealPart(*aa++) != 0.) continue; 1627 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1628 } 1629 } 1630 } 1631 } 1632 1633 color = PETSC_DRAW_RED; 1634 for (i=0,row=0; i<mbs; i++,row+=bs) { 1635 for (j=a->i[i]; j<a->i[i+1]; j++) { 1636 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1637 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1638 aa = a->a + j*bs2; 1639 for (k=0; k<bs; k++) { 1640 for (l=0; l<bs; l++) { 1641 if (PetscRealPart(*aa++) <= 0.) continue; 1642 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1643 } 1644 } 1645 } 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1652 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1653 { 1654 PetscErrorCode ierr; 1655 PetscReal xl,yl,xr,yr,w,h; 1656 PetscDraw draw; 1657 PetscTruth isnull; 1658 1659 PetscFunctionBegin; 1660 1661 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1662 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1663 1664 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1665 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1666 xr += w; yr += h; xl = -w; yl = -h; 1667 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1668 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1669 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatView_SeqBAIJ" 1675 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1676 { 1677 PetscErrorCode ierr; 1678 PetscTruth iascii,isbinary,isdraw; 1679 1680 PetscFunctionBegin; 1681 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1682 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1684 if (iascii){ 1685 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1686 } else if (isbinary) { 1687 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1688 } else if (isdraw) { 1689 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1690 } else { 1691 Mat B; 1692 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1693 ierr = MatView(B,viewer);CHKERRQ(ierr); 1694 ierr = MatDestroy(B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1702 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1705 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1706 PetscInt *ai = a->i,*ailen = a->ilen; 1707 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1708 MatScalar *ap,*aa = a->a; 1709 1710 PetscFunctionBegin; 1711 for (k=0; k<m; k++) { /* loop over rows */ 1712 row = im[k]; brow = row/bs; 1713 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1714 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1715 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1716 nrow = ailen[brow]; 1717 for (l=0; l<n; l++) { /* loop over columns */ 1718 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1719 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1720 col = in[l] ; 1721 bcol = col/bs; 1722 cidx = col%bs; 1723 ridx = row%bs; 1724 high = nrow; 1725 low = 0; /* assume unsorted */ 1726 while (high-low > 5) { 1727 t = (low+high)/2; 1728 if (rp[t] > bcol) high = t; 1729 else low = t; 1730 } 1731 for (i=low; i<high; i++) { 1732 if (rp[i] > bcol) break; 1733 if (rp[i] == bcol) { 1734 *v++ = ap[bs2*i+bs*cidx+ridx]; 1735 goto finished; 1736 } 1737 } 1738 *v++ = 0.0; 1739 finished:; 1740 } 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #define CHUNKSIZE 10 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1748 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1749 { 1750 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1751 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1752 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1753 PetscErrorCode ierr; 1754 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1755 PetscTruth roworiented=a->roworiented; 1756 const PetscScalar *value = v; 1757 MatScalar *ap,*aa = a->a,*bap; 1758 1759 PetscFunctionBegin; 1760 if (roworiented) { 1761 stepval = (n-1)*bs; 1762 } else { 1763 stepval = (m-1)*bs; 1764 } 1765 for (k=0; k<m; k++) { /* loop over added rows */ 1766 row = im[k]; 1767 if (row < 0) continue; 1768 #if defined(PETSC_USE_DEBUG) 1769 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1770 #endif 1771 rp = aj + ai[row]; 1772 ap = aa + bs2*ai[row]; 1773 rmax = imax[row]; 1774 nrow = ailen[row]; 1775 low = 0; 1776 high = nrow; 1777 for (l=0; l<n; l++) { /* loop over added columns */ 1778 if (in[l] < 0) continue; 1779 #if defined(PETSC_USE_DEBUG) 1780 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1781 #endif 1782 col = in[l]; 1783 if (roworiented) { 1784 value = v + k*(stepval+bs)*bs + l*bs; 1785 } else { 1786 value = v + l*(stepval+bs)*bs + k*bs; 1787 } 1788 if (col <= lastcol) low = 0; else high = nrow; 1789 lastcol = col; 1790 while (high-low > 7) { 1791 t = (low+high)/2; 1792 if (rp[t] > col) high = t; 1793 else low = t; 1794 } 1795 for (i=low; i<high; i++) { 1796 if (rp[i] > col) break; 1797 if (rp[i] == col) { 1798 bap = ap + bs2*i; 1799 if (roworiented) { 1800 if (is == ADD_VALUES) { 1801 for (ii=0; ii<bs; ii++,value+=stepval) { 1802 for (jj=ii; jj<bs2; jj+=bs) { 1803 bap[jj] += *value++; 1804 } 1805 } 1806 } else { 1807 for (ii=0; ii<bs; ii++,value+=stepval) { 1808 for (jj=ii; jj<bs2; jj+=bs) { 1809 bap[jj] = *value++; 1810 } 1811 } 1812 } 1813 } else { 1814 if (is == ADD_VALUES) { 1815 for (ii=0; ii<bs; ii++,value+=stepval) { 1816 for (jj=0; jj<bs; jj++) { 1817 *bap++ += *value++; 1818 } 1819 } 1820 } else { 1821 for (ii=0; ii<bs; ii++,value+=stepval) { 1822 for (jj=0; jj<bs; jj++) { 1823 *bap++ = *value++; 1824 } 1825 } 1826 } 1827 } 1828 goto noinsert2; 1829 } 1830 } 1831 if (nonew == 1) goto noinsert2; 1832 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1833 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1834 N = nrow++ - 1; high++; 1835 /* shift up all the later entries in this row */ 1836 for (ii=N; ii>=i; ii--) { 1837 rp[ii+1] = rp[ii]; 1838 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1839 } 1840 if (N >= i) { 1841 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1842 } 1843 rp[i] = col; 1844 bap = ap + bs2*i; 1845 if (roworiented) { 1846 for (ii=0; ii<bs; ii++,value+=stepval) { 1847 for (jj=ii; jj<bs2; jj+=bs) { 1848 bap[jj] = *value++; 1849 } 1850 } 1851 } else { 1852 for (ii=0; ii<bs; ii++,value+=stepval) { 1853 for (jj=0; jj<bs; jj++) { 1854 *bap++ = *value++; 1855 } 1856 } 1857 } 1858 noinsert2:; 1859 low = i; 1860 } 1861 ailen[row] = nrow; 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1868 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1869 { 1870 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1871 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1872 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1873 PetscErrorCode ierr; 1874 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1875 MatScalar *aa = a->a,*ap; 1876 PetscReal ratio=0.6; 1877 1878 PetscFunctionBegin; 1879 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1880 1881 if (m) rmax = ailen[0]; 1882 for (i=1; i<mbs; i++) { 1883 /* move each row back by the amount of empty slots (fshift) before it*/ 1884 fshift += imax[i-1] - ailen[i-1]; 1885 rmax = PetscMax(rmax,ailen[i]); 1886 if (fshift) { 1887 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1888 N = ailen[i]; 1889 for (j=0; j<N; j++) { 1890 ip[j-fshift] = ip[j]; 1891 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1892 } 1893 } 1894 ai[i] = ai[i-1] + ailen[i-1]; 1895 } 1896 if (mbs) { 1897 fshift += imax[mbs-1] - ailen[mbs-1]; 1898 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1899 } 1900 /* reset ilen and imax for each row */ 1901 for (i=0; i<mbs; i++) { 1902 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1903 } 1904 a->nz = ai[mbs]; 1905 1906 /* diagonals may have moved, so kill the diagonal pointers */ 1907 a->idiagvalid = PETSC_FALSE; 1908 if (fshift && a->diag) { 1909 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1910 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1911 a->diag = 0; 1912 } 1913 if (fshift && a->nounused == -1) { 1914 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1915 } 1916 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1917 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1918 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1919 a->reallocs = 0; 1920 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1921 1922 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1923 if (a->compressedrow.use){ 1924 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1925 } 1926 1927 A->same_nonzero = PETSC_TRUE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 /* 1932 This function returns an array of flags which indicate the locations of contiguous 1933 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1934 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1935 Assume: sizes should be long enough to hold all the values. 1936 */ 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1939 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1940 { 1941 PetscInt i,j,k,row; 1942 PetscTruth flg; 1943 1944 PetscFunctionBegin; 1945 for (i=0,j=0; i<n; j++) { 1946 row = idx[i]; 1947 if (row%bs!=0) { /* Not the begining of a block */ 1948 sizes[j] = 1; 1949 i++; 1950 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1951 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1952 i++; 1953 } else { /* Begining of the block, so check if the complete block exists */ 1954 flg = PETSC_TRUE; 1955 for (k=1; k<bs; k++) { 1956 if (row+k != idx[i+k]) { /* break in the block */ 1957 flg = PETSC_FALSE; 1958 break; 1959 } 1960 } 1961 if (flg) { /* No break in the bs */ 1962 sizes[j] = bs; 1963 i+= bs; 1964 } else { 1965 sizes[j] = 1; 1966 i++; 1967 } 1968 } 1969 } 1970 *bs_max = j; 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1976 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1977 { 1978 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1979 PetscErrorCode ierr; 1980 PetscInt i,j,k,count,*rows; 1981 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1982 PetscScalar zero = 0.0; 1983 MatScalar *aa; 1984 1985 PetscFunctionBegin; 1986 /* Make a copy of the IS and sort it */ 1987 /* allocate memory for rows,sizes */ 1988 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1989 sizes = rows + is_n; 1990 1991 /* copy IS values to rows, and sort them */ 1992 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1993 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1994 if (baij->keepzeroedrows) { 1995 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1996 bs_max = is_n; 1997 A->same_nonzero = PETSC_TRUE; 1998 } else { 1999 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2000 A->same_nonzero = PETSC_FALSE; 2001 } 2002 2003 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2004 row = rows[j]; 2005 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2006 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2007 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2008 if (sizes[i] == bs && !baij->keepzeroedrows) { 2009 if (diag != 0.0) { 2010 if (baij->ilen[row/bs] > 0) { 2011 baij->ilen[row/bs] = 1; 2012 baij->j[baij->i[row/bs]] = row/bs; 2013 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2014 } 2015 /* Now insert all the diagonal values for this bs */ 2016 for (k=0; k<bs; k++) { 2017 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2018 } 2019 } else { /* (diag == 0.0) */ 2020 baij->ilen[row/bs] = 0; 2021 } /* end (diag == 0.0) */ 2022 } else { /* (sizes[i] != bs) */ 2023 #if defined (PETSC_USE_DEBUG) 2024 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2025 #endif 2026 for (k=0; k<count; k++) { 2027 aa[0] = zero; 2028 aa += bs; 2029 } 2030 if (diag != 0.0) { 2031 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2032 } 2033 } 2034 } 2035 2036 ierr = PetscFree(rows);CHKERRQ(ierr); 2037 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2043 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2044 { 2045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2046 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2047 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2048 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2049 PetscErrorCode ierr; 2050 PetscInt ridx,cidx,bs2=a->bs2; 2051 PetscTruth roworiented=a->roworiented; 2052 MatScalar *ap,value,*aa=a->a,*bap; 2053 2054 PetscFunctionBegin; 2055 for (k=0; k<m; k++) { /* loop over added rows */ 2056 row = im[k]; 2057 brow = row/bs; 2058 if (row < 0) continue; 2059 #if defined(PETSC_USE_DEBUG) 2060 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2061 #endif 2062 rp = aj + ai[brow]; 2063 ap = aa + bs2*ai[brow]; 2064 rmax = imax[brow]; 2065 nrow = ailen[brow]; 2066 low = 0; 2067 high = nrow; 2068 for (l=0; l<n; l++) { /* loop over added columns */ 2069 if (in[l] < 0) continue; 2070 #if defined(PETSC_USE_DEBUG) 2071 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2072 #endif 2073 col = in[l]; bcol = col/bs; 2074 ridx = row % bs; cidx = col % bs; 2075 if (roworiented) { 2076 value = v[l + k*n]; 2077 } else { 2078 value = v[k + l*m]; 2079 } 2080 if (col <= lastcol) low = 0; else high = nrow; 2081 lastcol = col; 2082 while (high-low > 7) { 2083 t = (low+high)/2; 2084 if (rp[t] > bcol) high = t; 2085 else low = t; 2086 } 2087 for (i=low; i<high; i++) { 2088 if (rp[i] > bcol) break; 2089 if (rp[i] == bcol) { 2090 bap = ap + bs2*i + bs*cidx + ridx; 2091 if (is == ADD_VALUES) *bap += value; 2092 else *bap = value; 2093 goto noinsert1; 2094 } 2095 } 2096 if (nonew == 1) goto noinsert1; 2097 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2098 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2099 N = nrow++ - 1; high++; 2100 /* shift up all the later entries in this row */ 2101 for (ii=N; ii>=i; ii--) { 2102 rp[ii+1] = rp[ii]; 2103 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2104 } 2105 if (N>=i) { 2106 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2107 } 2108 rp[i] = bcol; 2109 ap[bs2*i + bs*cidx + ridx] = value; 2110 a->nz++; 2111 noinsert1:; 2112 low = i; 2113 } 2114 ailen[brow] = nrow; 2115 } 2116 A->same_nonzero = PETSC_FALSE; 2117 PetscFunctionReturn(0); 2118 } 2119 2120 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2124 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2125 { 2126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2127 Mat outA; 2128 PetscErrorCode ierr; 2129 PetscTruth row_identity,col_identity; 2130 2131 PetscFunctionBegin; 2132 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2133 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2134 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2135 if (!row_identity || !col_identity) { 2136 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2137 } 2138 2139 outA = inA; 2140 inA->factor = MAT_FACTOR_LU; 2141 2142 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2143 2144 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2145 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2146 a->row = row; 2147 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2148 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2149 a->col = col; 2150 2151 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2152 if (a->icol) { 2153 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2154 } 2155 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2157 2158 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2159 if (!a->solve_work) { 2160 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2161 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2162 } 2163 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2164 2165 PetscFunctionReturn(0); 2166 } 2167 2168 EXTERN_C_BEGIN 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2171 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2172 { 2173 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2174 PetscInt i,nz,nbs; 2175 2176 PetscFunctionBegin; 2177 nz = baij->maxnz/baij->bs2; 2178 nbs = baij->nbs; 2179 for (i=0; i<nz; i++) { 2180 baij->j[i] = indices[i]; 2181 } 2182 baij->nz = nz; 2183 for (i=0; i<nbs; i++) { 2184 baij->ilen[i] = baij->imax[i]; 2185 } 2186 2187 PetscFunctionReturn(0); 2188 } 2189 EXTERN_C_END 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2193 /*@ 2194 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2195 in the matrix. 2196 2197 Input Parameters: 2198 + mat - the SeqBAIJ matrix 2199 - indices - the column indices 2200 2201 Level: advanced 2202 2203 Notes: 2204 This can be called if you have precomputed the nonzero structure of the 2205 matrix and want to provide it to the matrix object to improve the performance 2206 of the MatSetValues() operation. 2207 2208 You MUST have set the correct numbers of nonzeros per row in the call to 2209 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2210 2211 MUST be called before any calls to MatSetValues(); 2212 2213 @*/ 2214 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2215 { 2216 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2217 2218 PetscFunctionBegin; 2219 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2220 PetscValidPointer(indices,2); 2221 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2222 if (f) { 2223 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2224 } else { 2225 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2226 } 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #undef __FUNCT__ 2231 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2232 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2233 { 2234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2235 PetscErrorCode ierr; 2236 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2237 PetscReal atmp; 2238 PetscScalar *x,zero = 0.0; 2239 MatScalar *aa; 2240 PetscInt ncols,brow,krow,kcol; 2241 2242 PetscFunctionBegin; 2243 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2244 bs = A->rmap->bs; 2245 aa = a->a; 2246 ai = a->i; 2247 aj = a->j; 2248 mbs = a->mbs; 2249 2250 ierr = VecSet(v,zero);CHKERRQ(ierr); 2251 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2252 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2253 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2254 for (i=0; i<mbs; i++) { 2255 ncols = ai[1] - ai[0]; ai++; 2256 brow = bs*i; 2257 for (j=0; j<ncols; j++){ 2258 for (kcol=0; kcol<bs; kcol++){ 2259 for (krow=0; krow<bs; krow++){ 2260 atmp = PetscAbsScalar(*aa);aa++; 2261 row = brow + krow; /* row index */ 2262 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2263 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2264 } 2265 } 2266 aj++; 2267 } 2268 } 2269 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatCopy_SeqBAIJ" 2275 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2276 { 2277 PetscErrorCode ierr; 2278 2279 PetscFunctionBegin; 2280 /* If the two matrices have the same copy implementation, use fast copy. */ 2281 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2283 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2284 2285 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2286 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2287 } 2288 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2289 } else { 2290 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2297 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2298 { 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2308 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2309 { 2310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2311 PetscFunctionBegin; 2312 *array = a->a; 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2318 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2319 { 2320 PetscFunctionBegin; 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #include "petscblaslapack.h" 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2327 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2328 { 2329 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2330 PetscErrorCode ierr; 2331 PetscInt i,bs=Y->rmap->bs,j,bs2; 2332 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2333 2334 PetscFunctionBegin; 2335 if (str == SAME_NONZERO_PATTERN) { 2336 PetscScalar alpha = a; 2337 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2338 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2339 if (y->xtoy && y->XtoY != X) { 2340 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2341 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2342 } 2343 if (!y->xtoy) { /* get xtoy */ 2344 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2345 y->XtoY = X; 2346 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2347 } 2348 bs2 = bs*bs; 2349 for (i=0; i<x->nz; i++) { 2350 j = 0; 2351 while (j < bs2){ 2352 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2353 j++; 2354 } 2355 } 2356 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2357 } else { 2358 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2359 } 2360 PetscFunctionReturn(0); 2361 } 2362 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2365 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2366 { 2367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2368 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2369 MatScalar *aa = a->a; 2370 2371 PetscFunctionBegin; 2372 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2378 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2379 { 2380 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2381 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2382 MatScalar *aa = a->a; 2383 2384 PetscFunctionBegin; 2385 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 2390 /* -------------------------------------------------------------------*/ 2391 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2392 MatGetRow_SeqBAIJ, 2393 MatRestoreRow_SeqBAIJ, 2394 MatMult_SeqBAIJ_N, 2395 /* 4*/ MatMultAdd_SeqBAIJ_N, 2396 MatMultTranspose_SeqBAIJ, 2397 MatMultTransposeAdd_SeqBAIJ, 2398 0, 2399 0, 2400 0, 2401 /*10*/ 0, 2402 MatLUFactor_SeqBAIJ, 2403 0, 2404 0, 2405 MatTranspose_SeqBAIJ, 2406 /*15*/ MatGetInfo_SeqBAIJ, 2407 MatEqual_SeqBAIJ, 2408 MatGetDiagonal_SeqBAIJ, 2409 MatDiagonalScale_SeqBAIJ, 2410 MatNorm_SeqBAIJ, 2411 /*20*/ 0, 2412 MatAssemblyEnd_SeqBAIJ, 2413 0, 2414 MatSetOption_SeqBAIJ, 2415 MatZeroEntries_SeqBAIJ, 2416 /*25*/ MatZeroRows_SeqBAIJ, 2417 0, 2418 0, 2419 0, 2420 0, 2421 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2422 0, 2423 0, 2424 MatGetArray_SeqBAIJ, 2425 MatRestoreArray_SeqBAIJ, 2426 /*35*/ MatDuplicate_SeqBAIJ, 2427 0, 2428 0, 2429 MatILUFactor_SeqBAIJ, 2430 0, 2431 /*40*/ MatAXPY_SeqBAIJ, 2432 MatGetSubMatrices_SeqBAIJ, 2433 MatIncreaseOverlap_SeqBAIJ, 2434 MatGetValues_SeqBAIJ, 2435 MatCopy_SeqBAIJ, 2436 /*45*/ 0, 2437 MatScale_SeqBAIJ, 2438 0, 2439 0, 2440 0, 2441 /*50*/ 0, 2442 MatGetRowIJ_SeqBAIJ, 2443 MatRestoreRowIJ_SeqBAIJ, 2444 0, 2445 0, 2446 /*55*/ 0, 2447 0, 2448 0, 2449 0, 2450 MatSetValuesBlocked_SeqBAIJ, 2451 /*60*/ MatGetSubMatrix_SeqBAIJ, 2452 MatDestroy_SeqBAIJ, 2453 MatView_SeqBAIJ, 2454 0, 2455 0, 2456 /*65*/ 0, 2457 0, 2458 0, 2459 0, 2460 0, 2461 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2462 0, 2463 MatConvert_Basic, 2464 0, 2465 0, 2466 /*75*/ 0, 2467 0, 2468 0, 2469 0, 2470 0, 2471 /*80*/ 0, 2472 0, 2473 0, 2474 0, 2475 MatLoad_SeqBAIJ, 2476 /*85*/ 0, 2477 0, 2478 0, 2479 0, 2480 0, 2481 /*90*/ 0, 2482 0, 2483 0, 2484 0, 2485 0, 2486 /*95*/ 0, 2487 0, 2488 0, 2489 0, 2490 0, 2491 /*100*/0, 2492 0, 2493 0, 2494 0, 2495 0, 2496 /*105*/0, 2497 MatRealPart_SeqBAIJ, 2498 MatImaginaryPart_SeqBAIJ, 2499 0, 2500 0, 2501 /*110*/0, 2502 0, 2503 0, 2504 0, 2505 MatMissingDiagonal_SeqBAIJ 2506 /*115*/ 2507 }; 2508 2509 EXTERN_C_BEGIN 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2512 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2513 { 2514 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2515 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 if (aij->nonew != 1) { 2520 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2521 } 2522 2523 /* allocate space for values if not already there */ 2524 if (!aij->saved_values) { 2525 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2526 } 2527 2528 /* copy values over */ 2529 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 EXTERN_C_END 2533 2534 EXTERN_C_BEGIN 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2537 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2538 { 2539 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2540 PetscErrorCode ierr; 2541 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2542 2543 PetscFunctionBegin; 2544 if (aij->nonew != 1) { 2545 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2546 } 2547 if (!aij->saved_values) { 2548 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2549 } 2550 2551 /* copy values over */ 2552 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 EXTERN_C_END 2556 2557 EXTERN_C_BEGIN 2558 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2559 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2560 EXTERN_C_END 2561 2562 EXTERN_C_BEGIN 2563 #undef __FUNCT__ 2564 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2565 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2566 { 2567 Mat_SeqBAIJ *b; 2568 PetscErrorCode ierr; 2569 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2570 PetscTruth flg,skipallocation = PETSC_FALSE; 2571 2572 PetscFunctionBegin; 2573 2574 if (nz == MAT_SKIP_ALLOCATION) { 2575 skipallocation = PETSC_TRUE; 2576 nz = 0; 2577 } 2578 2579 if (bs < 0) { 2580 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2581 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2582 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2583 bs = PetscAbs(bs); 2584 } 2585 if (nnz && newbs != bs) { 2586 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2587 } 2588 bs = newbs; 2589 2590 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2591 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2592 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2593 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2594 2595 B->preallocated = PETSC_TRUE; 2596 2597 mbs = B->rmap->n/bs; 2598 nbs = B->cmap->n/bs; 2599 bs2 = bs*bs; 2600 2601 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2602 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2603 } 2604 2605 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2606 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2607 if (nnz) { 2608 for (i=0; i<mbs; i++) { 2609 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2610 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2611 } 2612 } 2613 2614 b = (Mat_SeqBAIJ*)B->data; 2615 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2616 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2617 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2618 2619 if (!flg) { 2620 switch (bs) { 2621 case 1: 2622 B->ops->mult = MatMult_SeqBAIJ_1; 2623 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2624 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2625 break; 2626 case 2: 2627 B->ops->mult = MatMult_SeqBAIJ_2; 2628 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2629 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2630 break; 2631 case 3: 2632 B->ops->mult = MatMult_SeqBAIJ_3; 2633 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2634 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2635 break; 2636 case 4: 2637 B->ops->mult = MatMult_SeqBAIJ_4; 2638 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2639 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2640 break; 2641 case 5: 2642 B->ops->mult = MatMult_SeqBAIJ_5; 2643 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2644 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2645 break; 2646 case 6: 2647 B->ops->mult = MatMult_SeqBAIJ_6; 2648 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2649 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2650 break; 2651 case 7: 2652 B->ops->mult = MatMult_SeqBAIJ_7; 2653 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2654 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2655 break; 2656 default: 2657 B->ops->mult = MatMult_SeqBAIJ_N; 2658 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2659 break; 2660 } 2661 } 2662 B->rmap->bs = bs; 2663 b->mbs = mbs; 2664 b->nbs = nbs; 2665 if (!skipallocation) { 2666 if (!b->imax) { 2667 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2668 } 2669 /* b->ilen will count nonzeros in each block row so far. */ 2670 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2671 if (!nnz) { 2672 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2673 else if (nz <= 0) nz = 1; 2674 for (i=0; i<mbs; i++) b->imax[i] = nz; 2675 nz = nz*mbs; 2676 } else { 2677 nz = 0; 2678 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2679 } 2680 2681 /* allocate the matrix space */ 2682 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2683 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2684 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2685 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2686 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2687 b->singlemalloc = PETSC_TRUE; 2688 b->i[0] = 0; 2689 for (i=1; i<mbs+1; i++) { 2690 b->i[i] = b->i[i-1] + b->imax[i-1]; 2691 } 2692 b->free_a = PETSC_TRUE; 2693 b->free_ij = PETSC_TRUE; 2694 } else { 2695 b->free_a = PETSC_FALSE; 2696 b->free_ij = PETSC_FALSE; 2697 } 2698 2699 B->rmap->bs = bs; 2700 b->bs2 = bs2; 2701 b->mbs = mbs; 2702 b->nz = 0; 2703 b->maxnz = nz*bs2; 2704 B->info.nz_unneeded = (PetscReal)b->maxnz; 2705 PetscFunctionReturn(0); 2706 } 2707 EXTERN_C_END 2708 2709 EXTERN_C_BEGIN 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2712 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2713 { 2714 PetscInt i,m,nz,nz_max=0,*nnz; 2715 PetscScalar *values=0; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 2720 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2721 2722 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2723 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2724 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2725 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2726 m = B->rmap->n/bs; 2727 2728 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2729 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2730 for(i=0; i<m; i++) { 2731 nz = ii[i+1]- ii[i]; 2732 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2733 nz_max = PetscMax(nz_max, nz); 2734 nnz[i] = nz; 2735 } 2736 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2737 ierr = PetscFree(nnz);CHKERRQ(ierr); 2738 2739 values = (PetscScalar*)V; 2740 if (!values) { 2741 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2742 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2743 } 2744 for (i=0; i<m; i++) { 2745 PetscInt ncols = ii[i+1] - ii[i]; 2746 const PetscInt *icols = jj + ii[i]; 2747 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2748 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2749 } 2750 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2751 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2752 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2753 2754 PetscFunctionReturn(0); 2755 } 2756 EXTERN_C_END 2757 2758 2759 EXTERN_C_BEGIN 2760 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2761 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 2762 EXTERN_C_END 2763 2764 /*MC 2765 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2766 block sparse compressed row format. 2767 2768 Options Database Keys: 2769 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2770 2771 Level: beginner 2772 2773 .seealso: MatCreateSeqBAIJ() 2774 M*/ 2775 2776 2777 EXTERN_C_BEGIN 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatCreate_SeqBAIJ" 2780 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2781 { 2782 PetscErrorCode ierr; 2783 PetscMPIInt size; 2784 Mat_SeqBAIJ *b; 2785 2786 PetscFunctionBegin; 2787 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2788 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2789 2790 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2791 B->data = (void*)b; 2792 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2793 B->mapping = 0; 2794 b->row = 0; 2795 b->col = 0; 2796 b->icol = 0; 2797 b->reallocs = 0; 2798 b->saved_values = 0; 2799 2800 b->roworiented = PETSC_TRUE; 2801 b->nonew = 0; 2802 b->diag = 0; 2803 b->solve_work = 0; 2804 b->mult_work = 0; 2805 B->spptr = 0; 2806 B->info.nz_unneeded = (PetscReal)b->maxnz; 2807 b->keepzeroedrows = PETSC_FALSE; 2808 b->xtoy = 0; 2809 b->XtoY = 0; 2810 b->compressedrow.use = PETSC_FALSE; 2811 b->compressedrow.nrows = 0; 2812 b->compressedrow.i = PETSC_NULL; 2813 b->compressedrow.rindex = PETSC_NULL; 2814 b->compressedrow.checked = PETSC_FALSE; 2815 B->same_nonzero = PETSC_FALSE; 2816 2817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 2818 "MatGetFactorAvailable_seqbaij_petsc", 2819 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 2820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2821 "MatGetFactor_seqbaij_petsc", 2822 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2824 "MatInvertBlockDiagonal_SeqBAIJ", 2825 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2827 "MatStoreValues_SeqBAIJ", 2828 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2830 "MatRetrieveValues_SeqBAIJ", 2831 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2833 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2834 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2836 "MatConvert_SeqBAIJ_SeqAIJ", 2837 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2839 "MatConvert_SeqBAIJ_SeqSBAIJ", 2840 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2842 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2843 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2845 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2846 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2847 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2848 PetscFunctionReturn(0); 2849 } 2850 EXTERN_C_END 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2854 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 2855 { 2856 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2857 PetscErrorCode ierr; 2858 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2859 2860 PetscFunctionBegin; 2861 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2862 2863 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2864 for (i=0; i<mbs; i++) { 2865 c->imax[i] = a->imax[i]; 2866 c->ilen[i] = a->ilen[i]; 2867 } 2868 2869 /* allocate the matrix space */ 2870 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2871 c->singlemalloc = PETSC_TRUE; 2872 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2873 if (mbs > 0) { 2874 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2875 if (cpvalues == MAT_COPY_VALUES) { 2876 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2877 } else { 2878 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2879 } 2880 } 2881 c->roworiented = a->roworiented; 2882 c->nonew = a->nonew; 2883 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2884 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2885 c->bs2 = a->bs2; 2886 c->mbs = a->mbs; 2887 c->nbs = a->nbs; 2888 2889 if (a->diag) { 2890 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2891 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2892 for (i=0; i<mbs; i++) { 2893 c->diag[i] = a->diag[i]; 2894 } 2895 } else c->diag = 0; 2896 c->nz = a->nz; 2897 c->maxnz = a->maxnz; 2898 c->solve_work = 0; 2899 c->mult_work = 0; 2900 c->free_a = PETSC_TRUE; 2901 c->free_ij = PETSC_TRUE; 2902 C->preallocated = PETSC_TRUE; 2903 C->assembled = PETSC_TRUE; 2904 2905 c->compressedrow.use = a->compressedrow.use; 2906 c->compressedrow.nrows = a->compressedrow.nrows; 2907 c->compressedrow.checked = a->compressedrow.checked; 2908 if ( a->compressedrow.checked && a->compressedrow.use){ 2909 i = a->compressedrow.nrows; 2910 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2911 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2912 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2913 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2914 } else { 2915 c->compressedrow.use = PETSC_FALSE; 2916 c->compressedrow.i = PETSC_NULL; 2917 c->compressedrow.rindex = PETSC_NULL; 2918 } 2919 C->same_nonzero = A->same_nonzero; 2920 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2926 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2927 { 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2932 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2933 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2934 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 2935 PetscFunctionReturn(0); 2936 } 2937 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "MatLoad_SeqBAIJ" 2940 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2941 { 2942 Mat_SeqBAIJ *a; 2943 Mat B; 2944 PetscErrorCode ierr; 2945 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2946 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2947 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2948 PetscInt *masked,nmask,tmp,bs2,ishift; 2949 PetscMPIInt size; 2950 int fd; 2951 PetscScalar *aa; 2952 MPI_Comm comm = ((PetscObject)viewer)->comm; 2953 2954 PetscFunctionBegin; 2955 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2956 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2957 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2958 bs2 = bs*bs; 2959 2960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2961 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2962 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2963 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2964 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2965 M = header[1]; N = header[2]; nz = header[3]; 2966 2967 if (header[3] < 0) { 2968 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2969 } 2970 2971 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2972 2973 /* 2974 This code adds extra rows to make sure the number of rows is 2975 divisible by the blocksize 2976 */ 2977 mbs = M/bs; 2978 extra_rows = bs - M + bs*(mbs); 2979 if (extra_rows == bs) extra_rows = 0; 2980 else mbs++; 2981 if (extra_rows) { 2982 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2983 } 2984 2985 /* read in row lengths */ 2986 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2987 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2988 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2989 2990 /* read in column indices */ 2991 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2992 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2993 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2994 2995 /* loop over row lengths determining block row lengths */ 2996 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2997 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2998 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2999 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3000 masked = mask + mbs; 3001 rowcount = 0; nzcount = 0; 3002 for (i=0; i<mbs; i++) { 3003 nmask = 0; 3004 for (j=0; j<bs; j++) { 3005 kmax = rowlengths[rowcount]; 3006 for (k=0; k<kmax; k++) { 3007 tmp = jj[nzcount++]/bs; 3008 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3009 } 3010 rowcount++; 3011 } 3012 browlengths[i] += nmask; 3013 /* zero out the mask elements we set */ 3014 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3015 } 3016 3017 /* create our matrix */ 3018 ierr = MatCreate(comm,&B); 3019 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3020 ierr = MatSetType(B,type);CHKERRQ(ierr); 3021 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3022 a = (Mat_SeqBAIJ*)B->data; 3023 3024 /* set matrix "i" values */ 3025 a->i[0] = 0; 3026 for (i=1; i<= mbs; i++) { 3027 a->i[i] = a->i[i-1] + browlengths[i-1]; 3028 a->ilen[i-1] = browlengths[i-1]; 3029 } 3030 a->nz = 0; 3031 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3032 3033 /* read in nonzero values */ 3034 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3035 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3036 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3037 3038 /* set "a" and "j" values into matrix */ 3039 nzcount = 0; jcount = 0; 3040 for (i=0; i<mbs; i++) { 3041 nzcountb = nzcount; 3042 nmask = 0; 3043 for (j=0; j<bs; j++) { 3044 kmax = rowlengths[i*bs+j]; 3045 for (k=0; k<kmax; k++) { 3046 tmp = jj[nzcount++]/bs; 3047 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3048 } 3049 } 3050 /* sort the masked values */ 3051 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3052 3053 /* set "j" values into matrix */ 3054 maskcount = 1; 3055 for (j=0; j<nmask; j++) { 3056 a->j[jcount++] = masked[j]; 3057 mask[masked[j]] = maskcount++; 3058 } 3059 /* set "a" values into matrix */ 3060 ishift = bs2*a->i[i]; 3061 for (j=0; j<bs; j++) { 3062 kmax = rowlengths[i*bs+j]; 3063 for (k=0; k<kmax; k++) { 3064 tmp = jj[nzcountb]/bs ; 3065 block = mask[tmp] - 1; 3066 point = jj[nzcountb] - bs*tmp; 3067 idx = ishift + bs2*block + j + bs*point; 3068 a->a[idx] = (MatScalar)aa[nzcountb++]; 3069 } 3070 } 3071 /* zero out the mask elements we set */ 3072 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3073 } 3074 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3075 3076 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3077 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3078 ierr = PetscFree(aa);CHKERRQ(ierr); 3079 ierr = PetscFree(jj);CHKERRQ(ierr); 3080 ierr = PetscFree(mask);CHKERRQ(ierr); 3081 3082 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3083 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3084 ierr = MatView_Private(B);CHKERRQ(ierr); 3085 3086 *A = B; 3087 PetscFunctionReturn(0); 3088 } 3089 3090 #undef __FUNCT__ 3091 #define __FUNCT__ "MatCreateSeqBAIJ" 3092 /*@C 3093 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3094 compressed row) format. For good matrix assembly performance the 3095 user should preallocate the matrix storage by setting the parameter nz 3096 (or the array nnz). By setting these parameters accurately, performance 3097 during matrix assembly can be increased by more than a factor of 50. 3098 3099 Collective on MPI_Comm 3100 3101 Input Parameters: 3102 + comm - MPI communicator, set to PETSC_COMM_SELF 3103 . bs - size of block 3104 . m - number of rows 3105 . n - number of columns 3106 . nz - number of nonzero blocks per block row (same for all rows) 3107 - nnz - array containing the number of nonzero blocks in the various block rows 3108 (possibly different for each block row) or PETSC_NULL 3109 3110 Output Parameter: 3111 . A - the matrix 3112 3113 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3114 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3115 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3116 3117 Options Database Keys: 3118 . -mat_no_unroll - uses code that does not unroll the loops in the 3119 block calculations (much slower) 3120 . -mat_block_size - size of the blocks to use 3121 3122 Level: intermediate 3123 3124 Notes: 3125 The number of rows and columns must be divisible by blocksize. 3126 3127 If the nnz parameter is given then the nz parameter is ignored 3128 3129 A nonzero block is any block that as 1 or more nonzeros in it 3130 3131 The block AIJ format is fully compatible with standard Fortran 77 3132 storage. That is, the stored row and column indices can begin at 3133 either one (as in Fortran) or zero. See the users' manual for details. 3134 3135 Specify the preallocated storage with either nz or nnz (not both). 3136 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3137 allocation. For additional details, see the users manual chapter on 3138 matrices. 3139 3140 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3141 @*/ 3142 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3143 { 3144 PetscErrorCode ierr; 3145 3146 PetscFunctionBegin; 3147 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3148 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3149 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3150 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3151 PetscFunctionReturn(0); 3152 } 3153 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3156 /*@C 3157 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3158 per row in the matrix. For good matrix assembly performance the 3159 user should preallocate the matrix storage by setting the parameter nz 3160 (or the array nnz). By setting these parameters accurately, performance 3161 during matrix assembly can be increased by more than a factor of 50. 3162 3163 Collective on MPI_Comm 3164 3165 Input Parameters: 3166 + A - the matrix 3167 . bs - size of block 3168 . nz - number of block nonzeros per block row (same for all rows) 3169 - nnz - array containing the number of block nonzeros in the various block rows 3170 (possibly different for each block row) or PETSC_NULL 3171 3172 Options Database Keys: 3173 . -mat_no_unroll - uses code that does not unroll the loops in the 3174 block calculations (much slower) 3175 . -mat_block_size - size of the blocks to use 3176 3177 Level: intermediate 3178 3179 Notes: 3180 If the nnz parameter is given then the nz parameter is ignored 3181 3182 You can call MatGetInfo() to get information on how effective the preallocation was; 3183 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3184 You can also run with the option -info and look for messages with the string 3185 malloc in them to see if additional memory allocation was needed. 3186 3187 The block AIJ format is fully compatible with standard Fortran 77 3188 storage. That is, the stored row and column indices can begin at 3189 either one (as in Fortran) or zero. See the users' manual for details. 3190 3191 Specify the preallocated storage with either nz or nnz (not both). 3192 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3193 allocation. For additional details, see the users manual chapter on 3194 matrices. 3195 3196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3197 @*/ 3198 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3199 { 3200 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3201 3202 PetscFunctionBegin; 3203 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3204 if (f) { 3205 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3206 } 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3212 /*@C 3213 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3214 (the default sequential PETSc format). 3215 3216 Collective on MPI_Comm 3217 3218 Input Parameters: 3219 + A - the matrix 3220 . i - the indices into j for the start of each local row (starts with zero) 3221 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3222 - v - optional values in the matrix 3223 3224 Level: developer 3225 3226 .keywords: matrix, aij, compressed row, sparse 3227 3228 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3229 @*/ 3230 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3231 { 3232 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3233 3234 PetscFunctionBegin; 3235 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3236 if (f) { 3237 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3238 } 3239 PetscFunctionReturn(0); 3240 } 3241 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3245 /*@ 3246 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3247 (upper triangular entries in CSR format) provided by the user. 3248 3249 Collective on MPI_Comm 3250 3251 Input Parameters: 3252 + comm - must be an MPI communicator of size 1 3253 . bs - size of block 3254 . m - number of rows 3255 . n - number of columns 3256 . i - row indices 3257 . j - column indices 3258 - a - matrix values 3259 3260 Output Parameter: 3261 . mat - the matrix 3262 3263 Level: intermediate 3264 3265 Notes: 3266 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3267 once the matrix is destroyed 3268 3269 You cannot set new nonzero locations into this matrix, that will generate an error. 3270 3271 The i and j indices are 0 based 3272 3273 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3274 3275 @*/ 3276 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3277 { 3278 PetscErrorCode ierr; 3279 PetscInt ii; 3280 Mat_SeqBAIJ *baij; 3281 3282 PetscFunctionBegin; 3283 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3284 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3285 3286 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3287 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3288 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3289 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3290 baij = (Mat_SeqBAIJ*)(*mat)->data; 3291 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3292 3293 baij->i = i; 3294 baij->j = j; 3295 baij->a = a; 3296 baij->singlemalloc = PETSC_FALSE; 3297 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3298 baij->free_a = PETSC_FALSE; 3299 baij->free_ij = PETSC_FALSE; 3300 3301 for (ii=0; ii<m; ii++) { 3302 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3303 #if defined(PETSC_USE_DEBUG) 3304 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3305 #endif 3306 } 3307 #if defined(PETSC_USE_DEBUG) 3308 for (ii=0; ii<baij->i[m]; ii++) { 3309 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3310 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3311 } 3312 #endif 3313 3314 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3315 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } 3318