1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "../src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "../src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 diag = a->diag; 1105 for (i=0; i<a->mbs; i++) { 1106 if (jj[diag[i]] != i) { 1107 *missing = PETSC_TRUE; 1108 if (d) *d = i; 1109 } 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,m = a->mbs; 1121 1122 PetscFunctionBegin; 1123 if (!a->diag) { 1124 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1125 } 1126 for (i=0; i<m; i++) { 1127 a->diag[i] = a->i[i+1]; 1128 for (j=a->i[i]; j<a->i[i+1]; j++) { 1129 if (a->j[j] == i) { 1130 a->diag[i] = j; 1131 break; 1132 } 1133 } 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1148 PetscInt *tia, *tja; 1149 1150 PetscFunctionBegin; 1151 *nn = n; 1152 if (!ia) PetscFunctionReturn(0); 1153 if (symmetric) { 1154 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1155 nz = tia[n]; 1156 } else { 1157 tia = a->i; tja = a->j; 1158 } 1159 1160 if (!blockcompressed && bs > 1) { 1161 (*nn) *= bs; 1162 nbs = bs; 1163 /* malloc & create the natural set of indices */ 1164 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1165 if (n) { 1166 (*ia)[0] = 0; 1167 for (j=1; j<bs; j++) { 1168 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1169 } 1170 } 1171 1172 for (i=1; i<n; i++) { 1173 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1174 for (j=1; j<bs; j++) { 1175 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1176 } 1177 } 1178 if (n) { 1179 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1180 } 1181 1182 if (ja) { 1183 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1184 cnt = 0; 1185 for (i=0; i<n; i++) { 1186 for (j=0; j<bs; j++) { 1187 for (k=tia[i]; k<tia[i+1]; k++) { 1188 for (l=0; l<bs; l++) { 1189 (*ja)[cnt++] = bs*tja[k] + l; 1190 } 1191 } 1192 } 1193 } 1194 } 1195 1196 n *= bs; 1197 nz *= bs*bs; 1198 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1199 ierr = PetscFree(tia);CHKERRQ(ierr); 1200 ierr = PetscFree(tja);CHKERRQ(ierr); 1201 } 1202 } else { 1203 *ia = tia; 1204 if (ja) *ja = tja; 1205 } 1206 if (oshift == 1) { 1207 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1208 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1215 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1216 { 1217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1218 PetscErrorCode ierr; 1219 PetscInt i,n = a->mbs,nz = a->i[n]; 1220 1221 PetscFunctionBegin; 1222 if (!ia) PetscFunctionReturn(0); 1223 if (!blockcompressed && A->rmap->bs > 1) { 1224 ierr = PetscFree(*ia);CHKERRQ(ierr); 1225 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1226 } else if (symmetric) { 1227 ierr = PetscFree(*ia);CHKERRQ(ierr); 1228 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1229 } else if (oshift == 1) { /* blockcompressed */ 1230 for (i=0; i<n+1; i++) a->i[i]--; 1231 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1238 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1239 { 1240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 #if defined(PETSC_USE_LOG) 1245 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1246 #endif 1247 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1248 if (a->row) { 1249 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1250 } 1251 if (a->col) { 1252 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1255 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1256 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1257 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1258 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1259 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1260 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1261 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1262 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1263 1264 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1265 ierr = PetscFree(a);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1281 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1282 { 1283 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 switch (op) { 1288 case MAT_ROW_ORIENTED: 1289 a->roworiented = flg; 1290 break; 1291 case MAT_KEEP_ZEROED_ROWS: 1292 a->keepzeroedrows = flg; 1293 break; 1294 case MAT_NEW_NONZERO_LOCATIONS: 1295 a->nonew = (flg ? 0 : 1); 1296 break; 1297 case MAT_NEW_NONZERO_LOCATION_ERR: 1298 a->nonew = (flg ? -1 : 0); 1299 break; 1300 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1301 a->nonew = (flg ? -2 : 0); 1302 break; 1303 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1304 a->nounused = (flg ? -1 : 0); 1305 break; 1306 case MAT_NEW_DIAGONALS: 1307 case MAT_IGNORE_OFF_PROC_ENTRIES: 1308 case MAT_USE_HASH_TABLE: 1309 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1310 break; 1311 case MAT_SYMMETRIC: 1312 case MAT_STRUCTURALLY_SYMMETRIC: 1313 case MAT_HERMITIAN: 1314 case MAT_SYMMETRY_ETERNAL: 1315 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1316 break; 1317 default: 1318 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1325 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1328 PetscErrorCode ierr; 1329 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1330 MatScalar *aa,*aa_i; 1331 PetscScalar *v_i; 1332 1333 PetscFunctionBegin; 1334 bs = A->rmap->bs; 1335 ai = a->i; 1336 aj = a->j; 1337 aa = a->a; 1338 bs2 = a->bs2; 1339 1340 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1341 1342 bn = row/bs; /* Block number */ 1343 bp = row % bs; /* Block Position */ 1344 M = ai[bn+1] - ai[bn]; 1345 *nz = bs*M; 1346 1347 if (v) { 1348 *v = 0; 1349 if (*nz) { 1350 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1351 for (i=0; i<M; i++) { /* for each block in the block row */ 1352 v_i = *v + i*bs; 1353 aa_i = aa + bs2*(ai[bn] + i); 1354 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1355 } 1356 } 1357 } 1358 1359 if (idx) { 1360 *idx = 0; 1361 if (*nz) { 1362 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1363 for (i=0; i<M; i++) { /* for each block in the block row */ 1364 idx_i = *idx + i*bs; 1365 itmp = bs*aj[ai[bn] + i]; 1366 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1367 } 1368 } 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1375 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376 { 1377 PetscErrorCode ierr; 1378 1379 PetscFunctionBegin; 1380 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1381 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1387 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1388 { 1389 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1390 Mat C; 1391 PetscErrorCode ierr; 1392 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1393 PetscInt *rows,*cols,bs2=a->bs2; 1394 MatScalar *array; 1395 1396 PetscFunctionBegin; 1397 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1398 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1399 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1400 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1401 1402 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1403 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1404 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1405 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1406 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1407 ierr = PetscFree(col);CHKERRQ(ierr); 1408 } else { 1409 C = *B; 1410 } 1411 1412 array = a->a; 1413 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1414 cols = rows + bs; 1415 for (i=0; i<mbs; i++) { 1416 cols[0] = i*bs; 1417 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1418 len = ai[i+1] - ai[i]; 1419 for (j=0; j<len; j++) { 1420 rows[0] = (*aj++)*bs; 1421 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1422 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1423 array += bs2; 1424 } 1425 } 1426 ierr = PetscFree(rows);CHKERRQ(ierr); 1427 1428 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 1431 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1432 *B = C; 1433 } else { 1434 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1441 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1442 { 1443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1444 PetscErrorCode ierr; 1445 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1446 int fd; 1447 PetscScalar *aa; 1448 FILE *file; 1449 1450 PetscFunctionBegin; 1451 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1452 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1453 col_lens[0] = MAT_FILE_COOKIE; 1454 1455 col_lens[1] = A->rmap->N; 1456 col_lens[2] = A->cmap->n; 1457 col_lens[3] = a->nz*bs2; 1458 1459 /* store lengths of each row and write (including header) to file */ 1460 count = 0; 1461 for (i=0; i<a->mbs; i++) { 1462 for (j=0; j<bs; j++) { 1463 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1464 } 1465 } 1466 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1467 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1468 1469 /* store column indices (zero start index) */ 1470 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1471 count = 0; 1472 for (i=0; i<a->mbs; i++) { 1473 for (j=0; j<bs; j++) { 1474 for (k=a->i[i]; k<a->i[i+1]; k++) { 1475 for (l=0; l<bs; l++) { 1476 jj[count++] = bs*a->j[k] + l; 1477 } 1478 } 1479 } 1480 } 1481 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1482 ierr = PetscFree(jj);CHKERRQ(ierr); 1483 1484 /* store nonzero values */ 1485 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1486 count = 0; 1487 for (i=0; i<a->mbs; i++) { 1488 for (j=0; j<bs; j++) { 1489 for (k=a->i[i]; k<a->i[i+1]; k++) { 1490 for (l=0; l<bs; l++) { 1491 aa[count++] = a->a[bs2*k + l*bs + j]; 1492 } 1493 } 1494 } 1495 } 1496 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1497 ierr = PetscFree(aa);CHKERRQ(ierr); 1498 1499 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1500 if (file) { 1501 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1508 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1509 { 1510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1511 PetscErrorCode ierr; 1512 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1513 PetscViewerFormat format; 1514 1515 PetscFunctionBegin; 1516 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1517 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1518 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1520 Mat aij; 1521 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1522 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1523 ierr = MatDestroy(aij);CHKERRQ(ierr); 1524 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1525 PetscFunctionReturn(0); 1526 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1527 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1528 for (i=0; i<a->mbs; i++) { 1529 for (j=0; j<bs; j++) { 1530 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1531 for (k=a->i[i]; k<a->i[i+1]; k++) { 1532 for (l=0; l<bs; l++) { 1533 #if defined(PETSC_USE_COMPLEX) 1534 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1536 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1539 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } 1543 #else 1544 if (a->a[bs2*k + l*bs + j] != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1546 } 1547 #endif 1548 } 1549 } 1550 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1551 } 1552 } 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1554 } else { 1555 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1556 for (i=0; i<a->mbs; i++) { 1557 for (j=0; j<bs; j++) { 1558 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1559 for (k=a->i[i]; k<a->i[i+1]; k++) { 1560 for (l=0; l<bs; l++) { 1561 #if defined(PETSC_USE_COMPLEX) 1562 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1564 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1567 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } 1571 #else 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1573 #endif 1574 } 1575 } 1576 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1577 } 1578 } 1579 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1580 } 1581 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1587 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1588 { 1589 Mat A = (Mat) Aa; 1590 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1591 PetscErrorCode ierr; 1592 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1593 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1594 MatScalar *aa; 1595 PetscViewer viewer; 1596 1597 PetscFunctionBegin; 1598 1599 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1600 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1601 1602 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1603 1604 /* loop over matrix elements drawing boxes */ 1605 color = PETSC_DRAW_BLUE; 1606 for (i=0,row=0; i<mbs; i++,row+=bs) { 1607 for (j=a->i[i]; j<a->i[i+1]; j++) { 1608 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1609 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1610 aa = a->a + j*bs2; 1611 for (k=0; k<bs; k++) { 1612 for (l=0; l<bs; l++) { 1613 if (PetscRealPart(*aa++) >= 0.) continue; 1614 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1615 } 1616 } 1617 } 1618 } 1619 color = PETSC_DRAW_CYAN; 1620 for (i=0,row=0; i<mbs; i++,row+=bs) { 1621 for (j=a->i[i]; j<a->i[i+1]; j++) { 1622 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1623 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1624 aa = a->a + j*bs2; 1625 for (k=0; k<bs; k++) { 1626 for (l=0; l<bs; l++) { 1627 if (PetscRealPart(*aa++) != 0.) continue; 1628 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1629 } 1630 } 1631 } 1632 } 1633 1634 color = PETSC_DRAW_RED; 1635 for (i=0,row=0; i<mbs; i++,row+=bs) { 1636 for (j=a->i[i]; j<a->i[i+1]; j++) { 1637 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1638 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1639 aa = a->a + j*bs2; 1640 for (k=0; k<bs; k++) { 1641 for (l=0; l<bs; l++) { 1642 if (PetscRealPart(*aa++) <= 0.) continue; 1643 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1644 } 1645 } 1646 } 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1653 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1654 { 1655 PetscErrorCode ierr; 1656 PetscReal xl,yl,xr,yr,w,h; 1657 PetscDraw draw; 1658 PetscTruth isnull; 1659 1660 PetscFunctionBegin; 1661 1662 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1663 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1664 1665 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1666 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1667 xr += w; yr += h; xl = -w; yl = -h; 1668 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1669 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1670 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatView_SeqBAIJ" 1676 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1677 { 1678 PetscErrorCode ierr; 1679 PetscTruth iascii,isbinary,isdraw; 1680 1681 PetscFunctionBegin; 1682 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1684 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1685 if (iascii){ 1686 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1687 } else if (isbinary) { 1688 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1689 } else if (isdraw) { 1690 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1691 } else { 1692 Mat B; 1693 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1694 ierr = MatView(B,viewer);CHKERRQ(ierr); 1695 ierr = MatDestroy(B);CHKERRQ(ierr); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1703 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1706 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1707 PetscInt *ai = a->i,*ailen = a->ilen; 1708 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1709 MatScalar *ap,*aa = a->a; 1710 1711 PetscFunctionBegin; 1712 for (k=0; k<m; k++) { /* loop over rows */ 1713 row = im[k]; brow = row/bs; 1714 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1715 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1716 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1717 nrow = ailen[brow]; 1718 for (l=0; l<n; l++) { /* loop over columns */ 1719 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1720 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1721 col = in[l] ; 1722 bcol = col/bs; 1723 cidx = col%bs; 1724 ridx = row%bs; 1725 high = nrow; 1726 low = 0; /* assume unsorted */ 1727 while (high-low > 5) { 1728 t = (low+high)/2; 1729 if (rp[t] > bcol) high = t; 1730 else low = t; 1731 } 1732 for (i=low; i<high; i++) { 1733 if (rp[i] > bcol) break; 1734 if (rp[i] == bcol) { 1735 *v++ = ap[bs2*i+bs*cidx+ridx]; 1736 goto finished; 1737 } 1738 } 1739 *v++ = 0.0; 1740 finished:; 1741 } 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #define CHUNKSIZE 10 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1749 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1750 { 1751 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1752 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1753 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1754 PetscErrorCode ierr; 1755 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1756 PetscTruth roworiented=a->roworiented; 1757 const PetscScalar *value = v; 1758 MatScalar *ap,*aa = a->a,*bap; 1759 1760 PetscFunctionBegin; 1761 if (roworiented) { 1762 stepval = (n-1)*bs; 1763 } else { 1764 stepval = (m-1)*bs; 1765 } 1766 for (k=0; k<m; k++) { /* loop over added rows */ 1767 row = im[k]; 1768 if (row < 0) continue; 1769 #if defined(PETSC_USE_DEBUG) 1770 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1771 #endif 1772 rp = aj + ai[row]; 1773 ap = aa + bs2*ai[row]; 1774 rmax = imax[row]; 1775 nrow = ailen[row]; 1776 low = 0; 1777 high = nrow; 1778 for (l=0; l<n; l++) { /* loop over added columns */ 1779 if (in[l] < 0) continue; 1780 #if defined(PETSC_USE_DEBUG) 1781 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1782 #endif 1783 col = in[l]; 1784 if (roworiented) { 1785 value = v + k*(stepval+bs)*bs + l*bs; 1786 } else { 1787 value = v + l*(stepval+bs)*bs + k*bs; 1788 } 1789 if (col <= lastcol) low = 0; else high = nrow; 1790 lastcol = col; 1791 while (high-low > 7) { 1792 t = (low+high)/2; 1793 if (rp[t] > col) high = t; 1794 else low = t; 1795 } 1796 for (i=low; i<high; i++) { 1797 if (rp[i] > col) break; 1798 if (rp[i] == col) { 1799 bap = ap + bs2*i; 1800 if (roworiented) { 1801 if (is == ADD_VALUES) { 1802 for (ii=0; ii<bs; ii++,value+=stepval) { 1803 for (jj=ii; jj<bs2; jj+=bs) { 1804 bap[jj] += *value++; 1805 } 1806 } 1807 } else { 1808 for (ii=0; ii<bs; ii++,value+=stepval) { 1809 for (jj=ii; jj<bs2; jj+=bs) { 1810 bap[jj] = *value++; 1811 } 1812 } 1813 } 1814 } else { 1815 if (is == ADD_VALUES) { 1816 for (ii=0; ii<bs; ii++,value+=stepval) { 1817 for (jj=0; jj<bs; jj++) { 1818 *bap++ += *value++; 1819 } 1820 } 1821 } else { 1822 for (ii=0; ii<bs; ii++,value+=stepval) { 1823 for (jj=0; jj<bs; jj++) { 1824 *bap++ = *value++; 1825 } 1826 } 1827 } 1828 } 1829 goto noinsert2; 1830 } 1831 } 1832 if (nonew == 1) goto noinsert2; 1833 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1834 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1835 N = nrow++ - 1; high++; 1836 /* shift up all the later entries in this row */ 1837 for (ii=N; ii>=i; ii--) { 1838 rp[ii+1] = rp[ii]; 1839 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1840 } 1841 if (N >= i) { 1842 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1843 } 1844 rp[i] = col; 1845 bap = ap + bs2*i; 1846 if (roworiented) { 1847 for (ii=0; ii<bs; ii++,value+=stepval) { 1848 for (jj=ii; jj<bs2; jj+=bs) { 1849 bap[jj] = *value++; 1850 } 1851 } 1852 } else { 1853 for (ii=0; ii<bs; ii++,value+=stepval) { 1854 for (jj=0; jj<bs; jj++) { 1855 *bap++ = *value++; 1856 } 1857 } 1858 } 1859 noinsert2:; 1860 low = i; 1861 } 1862 ailen[row] = nrow; 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1869 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1870 { 1871 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1872 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1873 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1874 PetscErrorCode ierr; 1875 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1876 MatScalar *aa = a->a,*ap; 1877 PetscReal ratio=0.6; 1878 1879 PetscFunctionBegin; 1880 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1881 1882 if (m) rmax = ailen[0]; 1883 for (i=1; i<mbs; i++) { 1884 /* move each row back by the amount of empty slots (fshift) before it*/ 1885 fshift += imax[i-1] - ailen[i-1]; 1886 rmax = PetscMax(rmax,ailen[i]); 1887 if (fshift) { 1888 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1889 N = ailen[i]; 1890 for (j=0; j<N; j++) { 1891 ip[j-fshift] = ip[j]; 1892 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1893 } 1894 } 1895 ai[i] = ai[i-1] + ailen[i-1]; 1896 } 1897 if (mbs) { 1898 fshift += imax[mbs-1] - ailen[mbs-1]; 1899 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1900 } 1901 /* reset ilen and imax for each row */ 1902 for (i=0; i<mbs; i++) { 1903 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1904 } 1905 a->nz = ai[mbs]; 1906 1907 /* diagonals may have moved, so kill the diagonal pointers */ 1908 a->idiagvalid = PETSC_FALSE; 1909 if (fshift && a->diag) { 1910 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1911 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1912 a->diag = 0; 1913 } 1914 if (fshift && a->nounused == -1) { 1915 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1916 } 1917 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1918 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1919 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1920 a->reallocs = 0; 1921 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1922 1923 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1924 if (a->compressedrow.use){ 1925 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1926 } 1927 1928 A->same_nonzero = PETSC_TRUE; 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* 1933 This function returns an array of flags which indicate the locations of contiguous 1934 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1935 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1936 Assume: sizes should be long enough to hold all the values. 1937 */ 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1940 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1941 { 1942 PetscInt i,j,k,row; 1943 PetscTruth flg; 1944 1945 PetscFunctionBegin; 1946 for (i=0,j=0; i<n; j++) { 1947 row = idx[i]; 1948 if (row%bs!=0) { /* Not the begining of a block */ 1949 sizes[j] = 1; 1950 i++; 1951 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1952 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1953 i++; 1954 } else { /* Begining of the block, so check if the complete block exists */ 1955 flg = PETSC_TRUE; 1956 for (k=1; k<bs; k++) { 1957 if (row+k != idx[i+k]) { /* break in the block */ 1958 flg = PETSC_FALSE; 1959 break; 1960 } 1961 } 1962 if (flg) { /* No break in the bs */ 1963 sizes[j] = bs; 1964 i+= bs; 1965 } else { 1966 sizes[j] = 1; 1967 i++; 1968 } 1969 } 1970 } 1971 *bs_max = j; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1977 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1978 { 1979 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1980 PetscErrorCode ierr; 1981 PetscInt i,j,k,count,*rows; 1982 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1983 PetscScalar zero = 0.0; 1984 MatScalar *aa; 1985 1986 PetscFunctionBegin; 1987 /* Make a copy of the IS and sort it */ 1988 /* allocate memory for rows,sizes */ 1989 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1990 sizes = rows + is_n; 1991 1992 /* copy IS values to rows, and sort them */ 1993 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1994 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1995 if (baij->keepzeroedrows) { 1996 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1997 bs_max = is_n; 1998 A->same_nonzero = PETSC_TRUE; 1999 } else { 2000 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2001 A->same_nonzero = PETSC_FALSE; 2002 } 2003 2004 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2005 row = rows[j]; 2006 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2007 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2008 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2009 if (sizes[i] == bs && !baij->keepzeroedrows) { 2010 if (diag != 0.0) { 2011 if (baij->ilen[row/bs] > 0) { 2012 baij->ilen[row/bs] = 1; 2013 baij->j[baij->i[row/bs]] = row/bs; 2014 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2015 } 2016 /* Now insert all the diagonal values for this bs */ 2017 for (k=0; k<bs; k++) { 2018 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2019 } 2020 } else { /* (diag == 0.0) */ 2021 baij->ilen[row/bs] = 0; 2022 } /* end (diag == 0.0) */ 2023 } else { /* (sizes[i] != bs) */ 2024 #if defined (PETSC_USE_DEBUG) 2025 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2026 #endif 2027 for (k=0; k<count; k++) { 2028 aa[0] = zero; 2029 aa += bs; 2030 } 2031 if (diag != 0.0) { 2032 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2033 } 2034 } 2035 } 2036 2037 ierr = PetscFree(rows);CHKERRQ(ierr); 2038 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2044 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2045 { 2046 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2047 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2048 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2049 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2050 PetscErrorCode ierr; 2051 PetscInt ridx,cidx,bs2=a->bs2; 2052 PetscTruth roworiented=a->roworiented; 2053 MatScalar *ap,value,*aa=a->a,*bap; 2054 2055 PetscFunctionBegin; 2056 for (k=0; k<m; k++) { /* loop over added rows */ 2057 row = im[k]; 2058 brow = row/bs; 2059 if (row < 0) continue; 2060 #if defined(PETSC_USE_DEBUG) 2061 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2062 #endif 2063 rp = aj + ai[brow]; 2064 ap = aa + bs2*ai[brow]; 2065 rmax = imax[brow]; 2066 nrow = ailen[brow]; 2067 low = 0; 2068 high = nrow; 2069 for (l=0; l<n; l++) { /* loop over added columns */ 2070 if (in[l] < 0) continue; 2071 #if defined(PETSC_USE_DEBUG) 2072 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2073 #endif 2074 col = in[l]; bcol = col/bs; 2075 ridx = row % bs; cidx = col % bs; 2076 if (roworiented) { 2077 value = v[l + k*n]; 2078 } else { 2079 value = v[k + l*m]; 2080 } 2081 if (col <= lastcol) low = 0; else high = nrow; 2082 lastcol = col; 2083 while (high-low > 7) { 2084 t = (low+high)/2; 2085 if (rp[t] > bcol) high = t; 2086 else low = t; 2087 } 2088 for (i=low; i<high; i++) { 2089 if (rp[i] > bcol) break; 2090 if (rp[i] == bcol) { 2091 bap = ap + bs2*i + bs*cidx + ridx; 2092 if (is == ADD_VALUES) *bap += value; 2093 else *bap = value; 2094 goto noinsert1; 2095 } 2096 } 2097 if (nonew == 1) goto noinsert1; 2098 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2099 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2100 N = nrow++ - 1; high++; 2101 /* shift up all the later entries in this row */ 2102 for (ii=N; ii>=i; ii--) { 2103 rp[ii+1] = rp[ii]; 2104 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2105 } 2106 if (N>=i) { 2107 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2108 } 2109 rp[i] = bcol; 2110 ap[bs2*i + bs*cidx + ridx] = value; 2111 a->nz++; 2112 noinsert1:; 2113 low = i; 2114 } 2115 ailen[brow] = nrow; 2116 } 2117 A->same_nonzero = PETSC_FALSE; 2118 PetscFunctionReturn(0); 2119 } 2120 2121 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2125 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2126 { 2127 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2128 Mat outA; 2129 PetscErrorCode ierr; 2130 PetscTruth row_identity,col_identity; 2131 2132 PetscFunctionBegin; 2133 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2134 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2135 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2136 if (!row_identity || !col_identity) { 2137 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2138 } 2139 2140 outA = inA; 2141 inA->factor = MAT_FACTOR_LU; 2142 2143 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2144 2145 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2146 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2147 a->row = row; 2148 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2149 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2150 a->col = col; 2151 2152 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2153 if (a->icol) { 2154 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2155 } 2156 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2157 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2158 2159 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2160 if (!a->solve_work) { 2161 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2162 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2163 } 2164 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2165 2166 PetscFunctionReturn(0); 2167 } 2168 2169 EXTERN_C_BEGIN 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2172 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2173 { 2174 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2175 PetscInt i,nz,nbs; 2176 2177 PetscFunctionBegin; 2178 nz = baij->maxnz/baij->bs2; 2179 nbs = baij->nbs; 2180 for (i=0; i<nz; i++) { 2181 baij->j[i] = indices[i]; 2182 } 2183 baij->nz = nz; 2184 for (i=0; i<nbs; i++) { 2185 baij->ilen[i] = baij->imax[i]; 2186 } 2187 2188 PetscFunctionReturn(0); 2189 } 2190 EXTERN_C_END 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2194 /*@ 2195 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2196 in the matrix. 2197 2198 Input Parameters: 2199 + mat - the SeqBAIJ matrix 2200 - indices - the column indices 2201 2202 Level: advanced 2203 2204 Notes: 2205 This can be called if you have precomputed the nonzero structure of the 2206 matrix and want to provide it to the matrix object to improve the performance 2207 of the MatSetValues() operation. 2208 2209 You MUST have set the correct numbers of nonzeros per row in the call to 2210 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2211 2212 MUST be called before any calls to MatSetValues(); 2213 2214 @*/ 2215 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2216 { 2217 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2218 2219 PetscFunctionBegin; 2220 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2221 PetscValidPointer(indices,2); 2222 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2223 if (f) { 2224 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2225 } else { 2226 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2233 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2234 { 2235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2236 PetscErrorCode ierr; 2237 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2238 PetscReal atmp; 2239 PetscScalar *x,zero = 0.0; 2240 MatScalar *aa; 2241 PetscInt ncols,brow,krow,kcol; 2242 2243 PetscFunctionBegin; 2244 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2245 bs = A->rmap->bs; 2246 aa = a->a; 2247 ai = a->i; 2248 aj = a->j; 2249 mbs = a->mbs; 2250 2251 ierr = VecSet(v,zero);CHKERRQ(ierr); 2252 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2253 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2254 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2255 for (i=0; i<mbs; i++) { 2256 ncols = ai[1] - ai[0]; ai++; 2257 brow = bs*i; 2258 for (j=0; j<ncols; j++){ 2259 for (kcol=0; kcol<bs; kcol++){ 2260 for (krow=0; krow<bs; krow++){ 2261 atmp = PetscAbsScalar(*aa);aa++; 2262 row = brow + krow; /* row index */ 2263 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2264 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2265 } 2266 } 2267 aj++; 2268 } 2269 } 2270 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatCopy_SeqBAIJ" 2276 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2277 { 2278 PetscErrorCode ierr; 2279 2280 PetscFunctionBegin; 2281 /* If the two matrices have the same copy implementation, use fast copy. */ 2282 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2283 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2284 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2285 2286 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2287 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2288 } 2289 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2290 } else { 2291 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2292 } 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2298 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2299 { 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2309 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2310 { 2311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2312 PetscFunctionBegin; 2313 *array = a->a; 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2319 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2320 { 2321 PetscFunctionBegin; 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #include "petscblaslapack.h" 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2328 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2329 { 2330 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2331 PetscErrorCode ierr; 2332 PetscInt i,bs=Y->rmap->bs,j,bs2; 2333 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2334 2335 PetscFunctionBegin; 2336 if (str == SAME_NONZERO_PATTERN) { 2337 PetscScalar alpha = a; 2338 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2339 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2340 if (y->xtoy && y->XtoY != X) { 2341 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2342 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2343 } 2344 if (!y->xtoy) { /* get xtoy */ 2345 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2346 y->XtoY = X; 2347 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2348 } 2349 bs2 = bs*bs; 2350 for (i=0; i<x->nz; i++) { 2351 j = 0; 2352 while (j < bs2){ 2353 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2354 j++; 2355 } 2356 } 2357 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2358 } else { 2359 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2366 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2367 { 2368 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2369 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2370 MatScalar *aa = a->a; 2371 2372 PetscFunctionBegin; 2373 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2379 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2380 { 2381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2382 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2383 MatScalar *aa = a->a; 2384 2385 PetscFunctionBegin; 2386 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 2391 /* -------------------------------------------------------------------*/ 2392 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2393 MatGetRow_SeqBAIJ, 2394 MatRestoreRow_SeqBAIJ, 2395 MatMult_SeqBAIJ_N, 2396 /* 4*/ MatMultAdd_SeqBAIJ_N, 2397 MatMultTranspose_SeqBAIJ, 2398 MatMultTransposeAdd_SeqBAIJ, 2399 0, 2400 0, 2401 0, 2402 /*10*/ 0, 2403 MatLUFactor_SeqBAIJ, 2404 0, 2405 0, 2406 MatTranspose_SeqBAIJ, 2407 /*15*/ MatGetInfo_SeqBAIJ, 2408 MatEqual_SeqBAIJ, 2409 MatGetDiagonal_SeqBAIJ, 2410 MatDiagonalScale_SeqBAIJ, 2411 MatNorm_SeqBAIJ, 2412 /*20*/ 0, 2413 MatAssemblyEnd_SeqBAIJ, 2414 0, 2415 MatSetOption_SeqBAIJ, 2416 MatZeroEntries_SeqBAIJ, 2417 /*25*/ MatZeroRows_SeqBAIJ, 2418 0, 2419 0, 2420 0, 2421 0, 2422 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2423 0, 2424 0, 2425 MatGetArray_SeqBAIJ, 2426 MatRestoreArray_SeqBAIJ, 2427 /*35*/ MatDuplicate_SeqBAIJ, 2428 0, 2429 0, 2430 MatILUFactor_SeqBAIJ, 2431 0, 2432 /*40*/ MatAXPY_SeqBAIJ, 2433 MatGetSubMatrices_SeqBAIJ, 2434 MatIncreaseOverlap_SeqBAIJ, 2435 MatGetValues_SeqBAIJ, 2436 MatCopy_SeqBAIJ, 2437 /*45*/ 0, 2438 MatScale_SeqBAIJ, 2439 0, 2440 0, 2441 0, 2442 /*50*/ 0, 2443 MatGetRowIJ_SeqBAIJ, 2444 MatRestoreRowIJ_SeqBAIJ, 2445 0, 2446 0, 2447 /*55*/ 0, 2448 0, 2449 0, 2450 0, 2451 MatSetValuesBlocked_SeqBAIJ, 2452 /*60*/ MatGetSubMatrix_SeqBAIJ, 2453 MatDestroy_SeqBAIJ, 2454 MatView_SeqBAIJ, 2455 0, 2456 0, 2457 /*65*/ 0, 2458 0, 2459 0, 2460 0, 2461 0, 2462 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2463 0, 2464 MatConvert_Basic, 2465 0, 2466 0, 2467 /*75*/ 0, 2468 0, 2469 0, 2470 0, 2471 0, 2472 /*80*/ 0, 2473 0, 2474 0, 2475 0, 2476 MatLoad_SeqBAIJ, 2477 /*85*/ 0, 2478 0, 2479 0, 2480 0, 2481 0, 2482 /*90*/ 0, 2483 0, 2484 0, 2485 0, 2486 0, 2487 /*95*/ 0, 2488 0, 2489 0, 2490 0, 2491 0, 2492 /*100*/0, 2493 0, 2494 0, 2495 0, 2496 0, 2497 /*105*/0, 2498 MatRealPart_SeqBAIJ, 2499 MatImaginaryPart_SeqBAIJ, 2500 0, 2501 0, 2502 /*110*/0, 2503 0, 2504 0, 2505 0, 2506 MatMissingDiagonal_SeqBAIJ 2507 /*115*/ 2508 }; 2509 2510 EXTERN_C_BEGIN 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2513 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2514 { 2515 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2516 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 if (aij->nonew != 1) { 2521 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2522 } 2523 2524 /* allocate space for values if not already there */ 2525 if (!aij->saved_values) { 2526 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2527 } 2528 2529 /* copy values over */ 2530 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 EXTERN_C_END 2534 2535 EXTERN_C_BEGIN 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2538 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2539 { 2540 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2541 PetscErrorCode ierr; 2542 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2543 2544 PetscFunctionBegin; 2545 if (aij->nonew != 1) { 2546 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2547 } 2548 if (!aij->saved_values) { 2549 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2550 } 2551 2552 /* copy values over */ 2553 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 EXTERN_C_END 2557 2558 EXTERN_C_BEGIN 2559 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2560 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2561 EXTERN_C_END 2562 2563 EXTERN_C_BEGIN 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2566 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2567 { 2568 Mat_SeqBAIJ *b; 2569 PetscErrorCode ierr; 2570 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2571 PetscTruth flg,skipallocation = PETSC_FALSE; 2572 2573 PetscFunctionBegin; 2574 2575 if (nz == MAT_SKIP_ALLOCATION) { 2576 skipallocation = PETSC_TRUE; 2577 nz = 0; 2578 } 2579 2580 if (bs < 0) { 2581 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2582 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2583 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2584 bs = PetscAbs(bs); 2585 } 2586 if (nnz && newbs != bs) { 2587 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2588 } 2589 bs = newbs; 2590 2591 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2592 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2593 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2594 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2595 2596 B->preallocated = PETSC_TRUE; 2597 2598 mbs = B->rmap->n/bs; 2599 nbs = B->cmap->n/bs; 2600 bs2 = bs*bs; 2601 2602 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2603 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2604 } 2605 2606 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2607 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2608 if (nnz) { 2609 for (i=0; i<mbs; i++) { 2610 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2611 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2612 } 2613 } 2614 2615 b = (Mat_SeqBAIJ*)B->data; 2616 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2617 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2618 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2619 2620 if (!flg) { 2621 switch (bs) { 2622 case 1: 2623 B->ops->mult = MatMult_SeqBAIJ_1; 2624 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2625 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2626 break; 2627 case 2: 2628 B->ops->mult = MatMult_SeqBAIJ_2; 2629 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2630 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2631 break; 2632 case 3: 2633 B->ops->mult = MatMult_SeqBAIJ_3; 2634 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2635 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2636 break; 2637 case 4: 2638 B->ops->mult = MatMult_SeqBAIJ_4; 2639 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2640 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2641 break; 2642 case 5: 2643 B->ops->mult = MatMult_SeqBAIJ_5; 2644 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2645 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2646 break; 2647 case 6: 2648 B->ops->mult = MatMult_SeqBAIJ_6; 2649 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2650 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2651 break; 2652 case 7: 2653 B->ops->mult = MatMult_SeqBAIJ_7; 2654 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2655 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2656 break; 2657 default: 2658 B->ops->mult = MatMult_SeqBAIJ_N; 2659 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2660 break; 2661 } 2662 } 2663 B->rmap->bs = bs; 2664 b->mbs = mbs; 2665 b->nbs = nbs; 2666 if (!skipallocation) { 2667 if (!b->imax) { 2668 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2669 } 2670 /* b->ilen will count nonzeros in each block row so far. */ 2671 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2672 if (!nnz) { 2673 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2674 else if (nz <= 0) nz = 1; 2675 for (i=0; i<mbs; i++) b->imax[i] = nz; 2676 nz = nz*mbs; 2677 } else { 2678 nz = 0; 2679 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2680 } 2681 2682 /* allocate the matrix space */ 2683 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2684 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2685 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2686 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2687 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2688 b->singlemalloc = PETSC_TRUE; 2689 b->i[0] = 0; 2690 for (i=1; i<mbs+1; i++) { 2691 b->i[i] = b->i[i-1] + b->imax[i-1]; 2692 } 2693 b->free_a = PETSC_TRUE; 2694 b->free_ij = PETSC_TRUE; 2695 } else { 2696 b->free_a = PETSC_FALSE; 2697 b->free_ij = PETSC_FALSE; 2698 } 2699 2700 B->rmap->bs = bs; 2701 b->bs2 = bs2; 2702 b->mbs = mbs; 2703 b->nz = 0; 2704 b->maxnz = nz*bs2; 2705 B->info.nz_unneeded = (PetscReal)b->maxnz; 2706 PetscFunctionReturn(0); 2707 } 2708 EXTERN_C_END 2709 2710 EXTERN_C_BEGIN 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2713 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2714 { 2715 PetscInt i,m,nz,nz_max=0,*nnz; 2716 PetscScalar *values=0; 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 2721 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2722 2723 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2724 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2725 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2726 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2727 m = B->rmap->n/bs; 2728 2729 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2730 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2731 for(i=0; i<m; i++) { 2732 nz = ii[i+1]- ii[i]; 2733 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2734 nz_max = PetscMax(nz_max, nz); 2735 nnz[i] = nz; 2736 } 2737 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2738 ierr = PetscFree(nnz);CHKERRQ(ierr); 2739 2740 values = (PetscScalar*)V; 2741 if (!values) { 2742 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2743 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2744 } 2745 for (i=0; i<m; i++) { 2746 PetscInt ncols = ii[i+1] - ii[i]; 2747 const PetscInt *icols = jj + ii[i]; 2748 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2749 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2750 } 2751 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2752 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2753 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 2755 PetscFunctionReturn(0); 2756 } 2757 EXTERN_C_END 2758 2759 2760 EXTERN_C_BEGIN 2761 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2762 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 2763 EXTERN_C_END 2764 2765 /*MC 2766 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2767 block sparse compressed row format. 2768 2769 Options Database Keys: 2770 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2771 2772 Level: beginner 2773 2774 .seealso: MatCreateSeqBAIJ() 2775 M*/ 2776 2777 2778 EXTERN_C_BEGIN 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "MatCreate_SeqBAIJ" 2781 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2782 { 2783 PetscErrorCode ierr; 2784 PetscMPIInt size; 2785 Mat_SeqBAIJ *b; 2786 2787 PetscFunctionBegin; 2788 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2789 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2790 2791 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2792 B->data = (void*)b; 2793 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2794 B->mapping = 0; 2795 b->row = 0; 2796 b->col = 0; 2797 b->icol = 0; 2798 b->reallocs = 0; 2799 b->saved_values = 0; 2800 2801 b->roworiented = PETSC_TRUE; 2802 b->nonew = 0; 2803 b->diag = 0; 2804 b->solve_work = 0; 2805 b->mult_work = 0; 2806 B->spptr = 0; 2807 B->info.nz_unneeded = (PetscReal)b->maxnz; 2808 b->keepzeroedrows = PETSC_FALSE; 2809 b->xtoy = 0; 2810 b->XtoY = 0; 2811 b->compressedrow.use = PETSC_FALSE; 2812 b->compressedrow.nrows = 0; 2813 b->compressedrow.i = PETSC_NULL; 2814 b->compressedrow.rindex = PETSC_NULL; 2815 b->compressedrow.checked = PETSC_FALSE; 2816 B->same_nonzero = PETSC_FALSE; 2817 2818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 2819 "MatGetFactorAvailable_seqbaij_petsc", 2820 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2822 "MatGetFactor_seqbaij_petsc", 2823 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2825 "MatInvertBlockDiagonal_SeqBAIJ", 2826 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2828 "MatStoreValues_SeqBAIJ", 2829 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2831 "MatRetrieveValues_SeqBAIJ", 2832 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2834 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2835 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2837 "MatConvert_SeqBAIJ_SeqAIJ", 2838 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2840 "MatConvert_SeqBAIJ_SeqSBAIJ", 2841 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2843 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2844 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2846 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2847 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2848 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 EXTERN_C_END 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2855 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 2856 { 2857 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2858 PetscErrorCode ierr; 2859 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2860 2861 PetscFunctionBegin; 2862 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2863 2864 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2865 for (i=0; i<mbs; i++) { 2866 c->imax[i] = a->imax[i]; 2867 c->ilen[i] = a->ilen[i]; 2868 } 2869 2870 /* allocate the matrix space */ 2871 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2872 c->singlemalloc = PETSC_TRUE; 2873 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2874 if (mbs > 0) { 2875 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2876 if (cpvalues == MAT_COPY_VALUES) { 2877 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2878 } else { 2879 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2880 } 2881 } 2882 c->roworiented = a->roworiented; 2883 c->nonew = a->nonew; 2884 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2885 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2886 c->bs2 = a->bs2; 2887 c->mbs = a->mbs; 2888 c->nbs = a->nbs; 2889 2890 if (a->diag) { 2891 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2892 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2893 for (i=0; i<mbs; i++) { 2894 c->diag[i] = a->diag[i]; 2895 } 2896 } else c->diag = 0; 2897 c->nz = a->nz; 2898 c->maxnz = a->maxnz; 2899 c->solve_work = 0; 2900 c->mult_work = 0; 2901 c->free_a = PETSC_TRUE; 2902 c->free_ij = PETSC_TRUE; 2903 C->preallocated = PETSC_TRUE; 2904 C->assembled = PETSC_TRUE; 2905 2906 c->compressedrow.use = a->compressedrow.use; 2907 c->compressedrow.nrows = a->compressedrow.nrows; 2908 c->compressedrow.checked = a->compressedrow.checked; 2909 if ( a->compressedrow.checked && a->compressedrow.use){ 2910 i = a->compressedrow.nrows; 2911 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2912 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2913 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2914 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2915 } else { 2916 c->compressedrow.use = PETSC_FALSE; 2917 c->compressedrow.i = PETSC_NULL; 2918 c->compressedrow.rindex = PETSC_NULL; 2919 } 2920 C->same_nonzero = A->same_nonzero; 2921 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2927 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2928 { 2929 PetscErrorCode ierr; 2930 2931 PetscFunctionBegin; 2932 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2933 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2934 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2935 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatLoad_SeqBAIJ" 2941 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2942 { 2943 Mat_SeqBAIJ *a; 2944 Mat B; 2945 PetscErrorCode ierr; 2946 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2947 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2948 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2949 PetscInt *masked,nmask,tmp,bs2,ishift; 2950 PetscMPIInt size; 2951 int fd; 2952 PetscScalar *aa; 2953 MPI_Comm comm = ((PetscObject)viewer)->comm; 2954 2955 PetscFunctionBegin; 2956 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2957 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2958 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2959 bs2 = bs*bs; 2960 2961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2962 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2963 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2964 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2965 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2966 M = header[1]; N = header[2]; nz = header[3]; 2967 2968 if (header[3] < 0) { 2969 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2970 } 2971 2972 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2973 2974 /* 2975 This code adds extra rows to make sure the number of rows is 2976 divisible by the blocksize 2977 */ 2978 mbs = M/bs; 2979 extra_rows = bs - M + bs*(mbs); 2980 if (extra_rows == bs) extra_rows = 0; 2981 else mbs++; 2982 if (extra_rows) { 2983 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2984 } 2985 2986 /* read in row lengths */ 2987 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2988 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2989 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2990 2991 /* read in column indices */ 2992 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2993 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2994 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2995 2996 /* loop over row lengths determining block row lengths */ 2997 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2998 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2999 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3000 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3001 masked = mask + mbs; 3002 rowcount = 0; nzcount = 0; 3003 for (i=0; i<mbs; i++) { 3004 nmask = 0; 3005 for (j=0; j<bs; j++) { 3006 kmax = rowlengths[rowcount]; 3007 for (k=0; k<kmax; k++) { 3008 tmp = jj[nzcount++]/bs; 3009 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3010 } 3011 rowcount++; 3012 } 3013 browlengths[i] += nmask; 3014 /* zero out the mask elements we set */ 3015 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3016 } 3017 3018 /* create our matrix */ 3019 ierr = MatCreate(comm,&B); 3020 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3021 ierr = MatSetType(B,type);CHKERRQ(ierr); 3022 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3023 a = (Mat_SeqBAIJ*)B->data; 3024 3025 /* set matrix "i" values */ 3026 a->i[0] = 0; 3027 for (i=1; i<= mbs; i++) { 3028 a->i[i] = a->i[i-1] + browlengths[i-1]; 3029 a->ilen[i-1] = browlengths[i-1]; 3030 } 3031 a->nz = 0; 3032 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3033 3034 /* read in nonzero values */ 3035 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3036 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3037 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3038 3039 /* set "a" and "j" values into matrix */ 3040 nzcount = 0; jcount = 0; 3041 for (i=0; i<mbs; i++) { 3042 nzcountb = nzcount; 3043 nmask = 0; 3044 for (j=0; j<bs; j++) { 3045 kmax = rowlengths[i*bs+j]; 3046 for (k=0; k<kmax; k++) { 3047 tmp = jj[nzcount++]/bs; 3048 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3049 } 3050 } 3051 /* sort the masked values */ 3052 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3053 3054 /* set "j" values into matrix */ 3055 maskcount = 1; 3056 for (j=0; j<nmask; j++) { 3057 a->j[jcount++] = masked[j]; 3058 mask[masked[j]] = maskcount++; 3059 } 3060 /* set "a" values into matrix */ 3061 ishift = bs2*a->i[i]; 3062 for (j=0; j<bs; j++) { 3063 kmax = rowlengths[i*bs+j]; 3064 for (k=0; k<kmax; k++) { 3065 tmp = jj[nzcountb]/bs ; 3066 block = mask[tmp] - 1; 3067 point = jj[nzcountb] - bs*tmp; 3068 idx = ishift + bs2*block + j + bs*point; 3069 a->a[idx] = (MatScalar)aa[nzcountb++]; 3070 } 3071 } 3072 /* zero out the mask elements we set */ 3073 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3074 } 3075 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3076 3077 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3078 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3079 ierr = PetscFree(aa);CHKERRQ(ierr); 3080 ierr = PetscFree(jj);CHKERRQ(ierr); 3081 ierr = PetscFree(mask);CHKERRQ(ierr); 3082 3083 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3084 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3085 ierr = MatView_Private(B);CHKERRQ(ierr); 3086 3087 *A = B; 3088 PetscFunctionReturn(0); 3089 } 3090 3091 #undef __FUNCT__ 3092 #define __FUNCT__ "MatCreateSeqBAIJ" 3093 /*@C 3094 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3095 compressed row) format. For good matrix assembly performance the 3096 user should preallocate the matrix storage by setting the parameter nz 3097 (or the array nnz). By setting these parameters accurately, performance 3098 during matrix assembly can be increased by more than a factor of 50. 3099 3100 Collective on MPI_Comm 3101 3102 Input Parameters: 3103 + comm - MPI communicator, set to PETSC_COMM_SELF 3104 . bs - size of block 3105 . m - number of rows 3106 . n - number of columns 3107 . nz - number of nonzero blocks per block row (same for all rows) 3108 - nnz - array containing the number of nonzero blocks in the various block rows 3109 (possibly different for each block row) or PETSC_NULL 3110 3111 Output Parameter: 3112 . A - the matrix 3113 3114 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3115 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3116 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3117 3118 Options Database Keys: 3119 . -mat_no_unroll - uses code that does not unroll the loops in the 3120 block calculations (much slower) 3121 . -mat_block_size - size of the blocks to use 3122 3123 Level: intermediate 3124 3125 Notes: 3126 The number of rows and columns must be divisible by blocksize. 3127 3128 If the nnz parameter is given then the nz parameter is ignored 3129 3130 A nonzero block is any block that as 1 or more nonzeros in it 3131 3132 The block AIJ format is fully compatible with standard Fortran 77 3133 storage. That is, the stored row and column indices can begin at 3134 either one (as in Fortran) or zero. See the users' manual for details. 3135 3136 Specify the preallocated storage with either nz or nnz (not both). 3137 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3138 allocation. For additional details, see the users manual chapter on 3139 matrices. 3140 3141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3142 @*/ 3143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3144 { 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3149 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3150 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3151 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3157 /*@C 3158 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3159 per row in the matrix. For good matrix assembly performance the 3160 user should preallocate the matrix storage by setting the parameter nz 3161 (or the array nnz). By setting these parameters accurately, performance 3162 during matrix assembly can be increased by more than a factor of 50. 3163 3164 Collective on MPI_Comm 3165 3166 Input Parameters: 3167 + A - the matrix 3168 . bs - size of block 3169 . nz - number of block nonzeros per block row (same for all rows) 3170 - nnz - array containing the number of block nonzeros in the various block rows 3171 (possibly different for each block row) or PETSC_NULL 3172 3173 Options Database Keys: 3174 . -mat_no_unroll - uses code that does not unroll the loops in the 3175 block calculations (much slower) 3176 . -mat_block_size - size of the blocks to use 3177 3178 Level: intermediate 3179 3180 Notes: 3181 If the nnz parameter is given then the nz parameter is ignored 3182 3183 You can call MatGetInfo() to get information on how effective the preallocation was; 3184 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3185 You can also run with the option -info and look for messages with the string 3186 malloc in them to see if additional memory allocation was needed. 3187 3188 The block AIJ format is fully compatible with standard Fortran 77 3189 storage. That is, the stored row and column indices can begin at 3190 either one (as in Fortran) or zero. See the users' manual for details. 3191 3192 Specify the preallocated storage with either nz or nnz (not both). 3193 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3194 allocation. For additional details, see the users manual chapter on 3195 matrices. 3196 3197 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3198 @*/ 3199 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3200 { 3201 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3202 3203 PetscFunctionBegin; 3204 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3205 if (f) { 3206 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3207 } 3208 PetscFunctionReturn(0); 3209 } 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3213 /*@C 3214 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3215 (the default sequential PETSc format). 3216 3217 Collective on MPI_Comm 3218 3219 Input Parameters: 3220 + A - the matrix 3221 . i - the indices into j for the start of each local row (starts with zero) 3222 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3223 - v - optional values in the matrix 3224 3225 Level: developer 3226 3227 .keywords: matrix, aij, compressed row, sparse 3228 3229 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3230 @*/ 3231 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3232 { 3233 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3234 3235 PetscFunctionBegin; 3236 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3237 if (f) { 3238 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3239 } 3240 PetscFunctionReturn(0); 3241 } 3242 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3246 /*@ 3247 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3248 (upper triangular entries in CSR format) provided by the user. 3249 3250 Collective on MPI_Comm 3251 3252 Input Parameters: 3253 + comm - must be an MPI communicator of size 1 3254 . bs - size of block 3255 . m - number of rows 3256 . n - number of columns 3257 . i - row indices 3258 . j - column indices 3259 - a - matrix values 3260 3261 Output Parameter: 3262 . mat - the matrix 3263 3264 Level: intermediate 3265 3266 Notes: 3267 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3268 once the matrix is destroyed 3269 3270 You cannot set new nonzero locations into this matrix, that will generate an error. 3271 3272 The i and j indices are 0 based 3273 3274 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3275 3276 @*/ 3277 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3278 { 3279 PetscErrorCode ierr; 3280 PetscInt ii; 3281 Mat_SeqBAIJ *baij; 3282 3283 PetscFunctionBegin; 3284 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3285 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3286 3287 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3288 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3289 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3290 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3291 baij = (Mat_SeqBAIJ*)(*mat)->data; 3292 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3293 3294 baij->i = i; 3295 baij->j = j; 3296 baij->a = a; 3297 baij->singlemalloc = PETSC_FALSE; 3298 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3299 baij->free_a = PETSC_FALSE; 3300 baij->free_ij = PETSC_FALSE; 3301 3302 for (ii=0; ii<m; ii++) { 3303 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3304 #if defined(PETSC_USE_DEBUG) 3305 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3306 #endif 3307 } 3308 #if defined(PETSC_USE_DEBUG) 3309 for (ii=0; ii<baij->i[m]; ii++) { 3310 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3311 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3312 } 3313 #endif 3314 3315 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3316 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319