1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr,(*f)(Mat); 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 32 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33 if (f) { 34 ierr = (*f)(mat);CHKERRQ(ierr); 35 } else { 36 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 EXTERN_C_BEGIN 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45 { 46 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 47 PetscErrorCode ierr; 48 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 49 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 50 PetscReal shift = 0.0; 51 52 PetscFunctionBegin; 53 if (a->idiagvalid) PetscFunctionReturn(0); 54 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 55 diag_offset = a->diag; 56 if (!a->idiag) { 57 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 58 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs2*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 case 7: 129 for (i=0; i<mbs; i++) { 130 odiag = v + 49*diag_offset[i]; 131 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 132 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 133 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 134 diag += 49; 135 mdiag += 49; 136 } 137 break; 138 default: 139 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 140 for (i=0; i<mbs; i++) { 141 odiag = v + bs2*diag_offset[i]; 142 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 143 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 144 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 145 diag += bs2; 146 mdiag += bs2; 147 } 148 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 149 } 150 a->idiagvalid = PETSC_TRUE; 151 PetscFunctionReturn(0); 152 } 153 EXTERN_C_END 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 157 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 158 { 159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 160 PetscScalar *x,x1,s1; 161 const PetscScalar *b; 162 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 163 PetscErrorCode ierr; 164 PetscInt m = a->mbs,i,i2,nz,j; 165 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 166 167 PetscFunctionBegin; 168 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 169 its = its*lits; 170 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 171 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 172 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 173 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 174 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 175 176 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 177 178 diag = a->diag; 179 idiag = a->idiag; 180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 181 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 182 183 if (flag & SOR_ZERO_INITIAL_GUESS) { 184 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 185 x[0] = b[0]*idiag[0]; 186 i2 = 1; 187 idiag += 1; 188 for (i=1; i<m; i++) { 189 v = aa + ai[i]; 190 vi = aj + ai[i]; 191 nz = diag[i] - ai[i]; 192 s1 = b[i2]; 193 for (j=0; j<nz; j++) { 194 s1 -= v[j]*x[vi[j]]; 195 } 196 x[i2] = idiag[0]*s1; 197 idiag += 1; 198 i2 += 1; 199 } 200 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 201 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 202 } 203 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 204 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 205 i2 = 0; 206 mdiag = a->idiag+a->mbs; 207 for (i=0; i<m; i++) { 208 x1 = x[i2]; 209 x[i2] = mdiag[0]*x1; 210 mdiag += 1; 211 i2 += 1; 212 } 213 ierr = PetscLogFlops(m);CHKERRQ(ierr); 214 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 215 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 216 } 217 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 218 idiag = a->idiag+a->mbs - 1; 219 i2 = m - 1; 220 x1 = x[i2]; 221 x[i2] = idiag[0]*x1; 222 idiag -= 1; 223 i2 -= 1; 224 for (i=m-2; i>=0; i--) { 225 v = aa + (diag[i]+1); 226 vi = aj + diag[i] + 1; 227 nz = ai[i+1] - diag[i] - 1; 228 s1 = x[i2]; 229 for (j=0; j<nz; j++) { 230 s1 -= v[j]*x[vi[j]]; 231 } 232 x[i2] = idiag[0]*s1; 233 idiag -= 1; 234 i2 -= 1; 235 } 236 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 237 } 238 } else { 239 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 240 } 241 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 248 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251 PetscScalar *x,x1,x2,s1,s2; 252 const PetscScalar *b; 253 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 254 PetscErrorCode ierr; 255 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 256 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 257 258 PetscFunctionBegin; 259 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 260 its = its*lits; 261 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 262 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 263 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 264 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 265 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 266 267 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 268 269 diag = a->diag; 270 idiag = a->idiag; 271 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 272 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 273 274 if (flag & SOR_ZERO_INITIAL_GUESS) { 275 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 276 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 277 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 278 i2 = 2; 279 idiag += 4; 280 for (i=1; i<m; i++) { 281 v = aa + 4*ai[i]; 282 vi = aj + ai[i]; 283 nz = diag[i] - ai[i]; 284 s1 = b[i2]; s2 = b[i2+1]; 285 for (j=0; j<nz; j++) { 286 idx = 2*vi[j]; 287 it = 4*j; 288 x1 = x[idx]; x2 = x[1+idx]; 289 s1 -= v[it]*x1 + v[it+2]*x2; 290 s2 -= v[it+1]*x1 + v[it+3]*x2; 291 } 292 x[i2] = idiag[0]*s1 + idiag[2]*s2; 293 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 294 idiag += 4; 295 i2 += 2; 296 } 297 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 298 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 299 } 300 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 301 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 302 i2 = 0; 303 mdiag = a->idiag+4*a->mbs; 304 for (i=0; i<m; i++) { 305 x1 = x[i2]; x2 = x[i2+1]; 306 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 307 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 308 mdiag += 4; 309 i2 += 2; 310 } 311 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 312 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 313 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 314 } 315 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 316 idiag = a->idiag+4*a->mbs - 4; 317 i2 = 2*m - 2; 318 x1 = x[i2]; x2 = x[i2+1]; 319 x[i2] = idiag[0]*x1 + idiag[2]*x2; 320 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 321 idiag -= 4; 322 i2 -= 2; 323 for (i=m-2; i>=0; i--) { 324 v = aa + 4*(diag[i]+1); 325 vi = aj + diag[i] + 1; 326 nz = ai[i+1] - diag[i] - 1; 327 s1 = x[i2]; s2 = x[i2+1]; 328 for (j=0; j<nz; j++) { 329 idx = 2*vi[j]; 330 it = 4*j; 331 x1 = x[idx]; x2 = x[1+idx]; 332 s1 -= v[it]*x1 + v[it+2]*x2; 333 s2 -= v[it+1]*x1 + v[it+3]*x2; 334 } 335 x[i2] = idiag[0]*s1 + idiag[2]*s2; 336 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 337 idiag -= 4; 338 i2 -= 2; 339 } 340 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 341 } 342 } else { 343 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 344 } 345 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 352 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 353 { 354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 355 PetscScalar *x,x1,x2,x3,s1,s2,s3; 356 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 357 const PetscScalar *b; 358 PetscErrorCode ierr; 359 PetscInt m = a->mbs,i,i2,nz,idx; 360 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 361 362 PetscFunctionBegin; 363 its = its*lits; 364 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 365 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 366 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 367 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 368 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 369 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 370 371 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 372 373 diag = a->diag; 374 idiag = a->idiag; 375 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 376 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 377 378 if (flag & SOR_ZERO_INITIAL_GUESS) { 379 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 380 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 381 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 382 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 383 i2 = 3; 384 idiag += 9; 385 for (i=1; i<m; i++) { 386 v = aa + 9*ai[i]; 387 vi = aj + ai[i]; 388 nz = diag[i] - ai[i]; 389 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 390 while (nz--) { 391 idx = 3*(*vi++); 392 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 393 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 394 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 395 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 396 v += 9; 397 } 398 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 399 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 400 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 401 idiag += 9; 402 i2 += 3; 403 } 404 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 405 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 406 } 407 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 408 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 409 i2 = 0; 410 mdiag = a->idiag+9*a->mbs; 411 for (i=0; i<m; i++) { 412 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 413 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 414 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 415 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 416 mdiag += 9; 417 i2 += 3; 418 } 419 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 420 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 421 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 422 } 423 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 424 idiag = a->idiag+9*a->mbs - 9; 425 i2 = 3*m - 3; 426 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 427 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 428 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 429 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 430 idiag -= 9; 431 i2 -= 3; 432 for (i=m-2; i>=0; i--) { 433 v = aa + 9*(diag[i]+1); 434 vi = aj + diag[i] + 1; 435 nz = ai[i+1] - diag[i] - 1; 436 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 437 while (nz--) { 438 idx = 3*(*vi++); 439 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 440 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 441 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 442 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 443 v += 9; 444 } 445 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 446 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 447 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 448 idiag -= 9; 449 i2 -= 3; 450 } 451 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 452 } 453 } else { 454 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 455 } 456 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 457 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 463 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 464 { 465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 466 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 467 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 468 const PetscScalar *b; 469 PetscErrorCode ierr; 470 PetscInt m = a->mbs,i,i2,nz,idx; 471 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 472 473 PetscFunctionBegin; 474 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 475 its = its*lits; 476 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 477 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 478 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 479 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 480 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 481 482 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 483 484 diag = a->diag; 485 idiag = a->idiag; 486 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 488 489 if (flag & SOR_ZERO_INITIAL_GUESS) { 490 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 491 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 492 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 493 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 494 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 495 i2 = 4; 496 idiag += 16; 497 for (i=1; i<m; i++) { 498 v = aa + 16*ai[i]; 499 vi = aj + ai[i]; 500 nz = diag[i] - ai[i]; 501 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 502 while (nz--) { 503 idx = 4*(*vi++); 504 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 505 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 506 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 507 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 508 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 509 v += 16; 510 } 511 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 512 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 513 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 514 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 515 idiag += 16; 516 i2 += 4; 517 } 518 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 519 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 520 } 521 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 522 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 523 i2 = 0; 524 mdiag = a->idiag+16*a->mbs; 525 for (i=0; i<m; i++) { 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 527 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 528 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 529 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 530 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 531 mdiag += 16; 532 i2 += 4; 533 } 534 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 535 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 536 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 537 } 538 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 539 idiag = a->idiag+16*a->mbs - 16; 540 i2 = 4*m - 4; 541 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 542 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 543 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 544 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 545 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 546 idiag -= 16; 547 i2 -= 4; 548 for (i=m-2; i>=0; i--) { 549 v = aa + 16*(diag[i]+1); 550 vi = aj + diag[i] + 1; 551 nz = ai[i+1] - diag[i] - 1; 552 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 553 while (nz--) { 554 idx = 4*(*vi++); 555 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 556 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 557 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 558 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 559 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 560 v += 16; 561 } 562 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 563 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 564 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 565 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 566 idiag -= 16; 567 i2 -= 4; 568 } 569 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 570 } 571 } else { 572 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 573 } 574 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 581 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 582 { 583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 584 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 585 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 586 const PetscScalar *b; 587 PetscErrorCode ierr; 588 PetscInt m = a->mbs,i,i2,nz,idx; 589 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 590 591 PetscFunctionBegin; 592 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 593 its = its*lits; 594 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 595 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 596 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 597 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 598 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 599 600 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 601 602 diag = a->diag; 603 idiag = a->idiag; 604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 606 607 if (flag & SOR_ZERO_INITIAL_GUESS) { 608 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 609 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 610 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 611 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 612 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 613 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 614 i2 = 5; 615 idiag += 25; 616 for (i=1; i<m; i++) { 617 v = aa + 25*ai[i]; 618 vi = aj + ai[i]; 619 nz = diag[i] - ai[i]; 620 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 621 while (nz--) { 622 idx = 5*(*vi++); 623 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 624 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 625 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 626 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 627 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 628 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 629 v += 25; 630 } 631 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 632 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 633 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 634 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 635 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 636 idiag += 25; 637 i2 += 5; 638 } 639 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 640 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 641 } 642 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 643 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 644 i2 = 0; 645 mdiag = a->idiag+25*a->mbs; 646 for (i=0; i<m; i++) { 647 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 648 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 649 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 650 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 651 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 652 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 653 mdiag += 25; 654 i2 += 5; 655 } 656 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 657 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 658 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 659 } 660 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 661 idiag = a->idiag+25*a->mbs - 25; 662 i2 = 5*m - 5; 663 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 664 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 665 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 666 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 667 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 668 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 669 idiag -= 25; 670 i2 -= 5; 671 for (i=m-2; i>=0; i--) { 672 v = aa + 25*(diag[i]+1); 673 vi = aj + diag[i] + 1; 674 nz = ai[i+1] - diag[i] - 1; 675 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 676 while (nz--) { 677 idx = 5*(*vi++); 678 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 679 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 680 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 681 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 682 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 683 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 684 v += 25; 685 } 686 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 687 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 688 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 689 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 690 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 691 idiag -= 25; 692 i2 -= 5; 693 } 694 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 695 } 696 } else { 697 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 698 } 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 706 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 709 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 710 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 711 const PetscScalar *b; 712 PetscErrorCode ierr; 713 PetscInt m = a->mbs,i,i2,nz,idx; 714 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 715 716 PetscFunctionBegin; 717 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 718 its = its*lits; 719 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 720 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 721 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 722 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 723 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 724 725 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 726 727 diag = a->diag; 728 idiag = a->idiag; 729 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 730 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 731 732 if (flag & SOR_ZERO_INITIAL_GUESS) { 733 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 734 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 735 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 736 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 737 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 738 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 739 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 740 i2 = 6; 741 idiag += 36; 742 for (i=1; i<m; i++) { 743 v = aa + 36*ai[i]; 744 vi = aj + ai[i]; 745 nz = diag[i] - ai[i]; 746 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 747 while (nz--) { 748 idx = 6*(*vi++); 749 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 750 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 751 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 752 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 753 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 754 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 755 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 756 v += 36; 757 } 758 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 759 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 760 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 761 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 762 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 763 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 764 idiag += 36; 765 i2 += 6; 766 } 767 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 768 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 769 } 770 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 771 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 772 i2 = 0; 773 mdiag = a->idiag+36*a->mbs; 774 for (i=0; i<m; i++) { 775 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 776 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 777 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 778 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 779 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 780 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 781 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 782 mdiag += 36; 783 i2 += 6; 784 } 785 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 786 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 787 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 788 } 789 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 790 idiag = a->idiag+36*a->mbs - 36; 791 i2 = 6*m - 6; 792 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 793 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 794 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 795 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 796 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 797 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 798 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 799 idiag -= 36; 800 i2 -= 6; 801 for (i=m-2; i>=0; i--) { 802 v = aa + 36*(diag[i]+1); 803 vi = aj + diag[i] + 1; 804 nz = ai[i+1] - diag[i] - 1; 805 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 806 while (nz--) { 807 idx = 6*(*vi++); 808 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 809 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 810 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 811 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 812 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 813 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 814 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 815 v += 36; 816 } 817 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 818 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 819 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 820 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 821 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 822 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 823 idiag -= 36; 824 i2 -= 6; 825 } 826 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 827 } 828 } else { 829 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 830 } 831 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 832 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 838 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 839 { 840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 841 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 842 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 843 const PetscScalar *b; 844 PetscErrorCode ierr; 845 PetscInt m = a->mbs,i,i2,nz,idx; 846 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 847 848 PetscFunctionBegin; 849 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 850 its = its*lits; 851 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 852 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 853 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 854 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 855 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 856 857 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 858 859 diag = a->diag; 860 idiag = a->idiag; 861 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 862 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 863 864 if (flag & SOR_ZERO_INITIAL_GUESS) { 865 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 866 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 867 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 868 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 869 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 870 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 871 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 872 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 873 i2 = 7; 874 idiag += 49; 875 for (i=1; i<m; i++) { 876 v = aa + 49*ai[i]; 877 vi = aj + ai[i]; 878 nz = diag[i] - ai[i]; 879 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 880 while (nz--) { 881 idx = 7*(*vi++); 882 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 883 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 884 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 885 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 886 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 887 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 888 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 889 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 890 v += 49; 891 } 892 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 893 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 894 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 895 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 896 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 897 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 898 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 899 idiag += 49; 900 i2 += 7; 901 } 902 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 903 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 904 } 905 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 906 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 907 i2 = 0; 908 mdiag = a->idiag+49*a->mbs; 909 for (i=0; i<m; i++) { 910 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 911 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 912 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 913 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 914 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 915 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 916 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 917 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 918 mdiag += 49; 919 i2 += 7; 920 } 921 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 922 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 923 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 924 } 925 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 926 idiag = a->idiag+49*a->mbs - 49; 927 i2 = 7*m - 7; 928 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 929 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 930 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 931 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 932 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 933 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 934 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 935 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 936 idiag -= 49; 937 i2 -= 7; 938 for (i=m-2; i>=0; i--) { 939 v = aa + 49*(diag[i]+1); 940 vi = aj + diag[i] + 1; 941 nz = ai[i+1] - diag[i] - 1; 942 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 943 while (nz--) { 944 idx = 7*(*vi++); 945 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 946 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 947 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 948 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 949 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 950 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 951 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 952 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 953 v += 49; 954 } 955 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 956 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 957 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 958 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 959 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 960 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 961 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 962 idiag -= 49; 963 i2 -= 7; 964 } 965 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 966 } 967 } else { 968 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 969 } 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 977 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 978 { 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscScalar *x,*work,*w,*workt; 981 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 982 const PetscScalar *b; 983 PetscErrorCode ierr; 984 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 985 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 986 987 PetscFunctionBegin; 988 its = its*lits; 989 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 990 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 991 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 992 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 993 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 994 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 995 996 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 997 998 diag = a->diag; 999 idiag = a->idiag; 1000 if (!a->mult_work) { 1001 k = PetscMax(A->rmap->n,A->cmap->n); 1002 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1003 } 1004 work = a->mult_work; 1005 if (!a->sor_work) { 1006 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1007 } 1008 w = a->sor_work; 1009 1010 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1012 1013 if (flag & SOR_ZERO_INITIAL_GUESS) { 1014 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1015 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1016 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1017 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1018 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1019 i2 = bs; 1020 idiag += bs2; 1021 for (i=1; i<m; i++) { 1022 v = aa + bs2*ai[i]; 1023 vi = aj + ai[i]; 1024 nz = diag[i] - ai[i]; 1025 1026 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1027 /* copy all rows of x that are needed into contiguous space */ 1028 workt = work; 1029 for (j=0; j<nz; j++) { 1030 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1031 workt += bs; 1032 } 1033 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1034 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1035 while (nz--) { 1036 idx = N*(*vi++); 1037 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1038 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1039 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1040 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1041 v += N2; 1042 } */ 1043 1044 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1045 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1046 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1047 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1048 1049 idiag += bs2; 1050 i2 += bs; 1051 } 1052 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1053 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1054 } 1055 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1056 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1057 i2 = 0; 1058 mdiag = a->idiag+bs2*a->mbs; 1059 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1060 for (i=0; i<m; i++) { 1061 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1062 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1063 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1064 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1065 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1066 1067 mdiag += bs2; 1068 i2 += bs; 1069 } 1070 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1071 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1072 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1073 } 1074 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1075 idiag = a->idiag+bs2*a->mbs - bs2; 1076 i2 = bs*m - bs; 1077 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1078 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1079 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1080 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1081 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1082 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1083 idiag -= bs2; 1084 i2 -= bs; 1085 for (i=m-2; i>=0; i--) { 1086 v = aa + bs2*(diag[i]+1); 1087 vi = aj + diag[i] + 1; 1088 nz = ai[i+1] - diag[i] - 1; 1089 1090 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1091 /* copy all rows of x that are needed into contiguous space */ 1092 workt = work; 1093 for (j=0; j<nz; j++) { 1094 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1095 workt += bs; 1096 } 1097 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1098 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1099 while (nz--) { 1100 idx = N*(*vi++); 1101 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1102 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1103 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1104 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1105 v += N2; 1106 } */ 1107 1108 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1109 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1110 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1111 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1112 idiag -= bs2; 1113 i2 -= bs; 1114 } 1115 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1116 } 1117 } else { 1118 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1119 } 1120 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1121 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /* 1126 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1127 */ 1128 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1129 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1131 #define matsetvaluesblocked4_ matsetvaluesblocked4 1132 #endif 1133 1134 EXTERN_C_BEGIN 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "matsetvaluesblocked4_" 1137 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1138 { 1139 Mat A = *AA; 1140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1141 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1142 PetscInt *ai=a->i,*ailen=a->ilen; 1143 PetscInt *aj=a->j,stepval,lastcol = -1; 1144 const PetscScalar *value = v; 1145 MatScalar *ap,*aa = a->a,*bap; 1146 1147 PetscFunctionBegin; 1148 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1149 stepval = (n-1)*4; 1150 for (k=0; k<m; k++) { /* loop over added rows */ 1151 row = im[k]; 1152 rp = aj + ai[row]; 1153 ap = aa + 16*ai[row]; 1154 nrow = ailen[row]; 1155 low = 0; 1156 high = nrow; 1157 for (l=0; l<n; l++) { /* loop over added columns */ 1158 col = in[l]; 1159 if (col <= lastcol) low = 0; else high = nrow; 1160 lastcol = col; 1161 value = v + k*(stepval+4 + l)*4; 1162 while (high-low > 7) { 1163 t = (low+high)/2; 1164 if (rp[t] > col) high = t; 1165 else low = t; 1166 } 1167 for (i=low; i<high; i++) { 1168 if (rp[i] > col) break; 1169 if (rp[i] == col) { 1170 bap = ap + 16*i; 1171 for (ii=0; ii<4; ii++,value+=stepval) { 1172 for (jj=ii; jj<16; jj+=4) { 1173 bap[jj] += *value++; 1174 } 1175 } 1176 goto noinsert2; 1177 } 1178 } 1179 N = nrow++ - 1; 1180 high++; /* added new column index thus must search to one higher than before */ 1181 /* shift up all the later entries in this row */ 1182 for (ii=N; ii>=i; ii--) { 1183 rp[ii+1] = rp[ii]; 1184 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1185 } 1186 if (N >= i) { 1187 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1188 } 1189 rp[i] = col; 1190 bap = ap + 16*i; 1191 for (ii=0; ii<4; ii++,value+=stepval) { 1192 for (jj=ii; jj<16; jj+=4) { 1193 bap[jj] = *value++; 1194 } 1195 } 1196 noinsert2:; 1197 low = i; 1198 } 1199 ailen[row] = nrow; 1200 } 1201 PetscFunctionReturnVoid(); 1202 } 1203 EXTERN_C_END 1204 1205 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1206 #define matsetvalues4_ MATSETVALUES4 1207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1208 #define matsetvalues4_ matsetvalues4 1209 #endif 1210 1211 EXTERN_C_BEGIN 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatSetValues4_" 1214 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1215 { 1216 Mat A = *AA; 1217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1218 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1219 PetscInt *ai=a->i,*ailen=a->ilen; 1220 PetscInt *aj=a->j,brow,bcol; 1221 PetscInt ridx,cidx,lastcol = -1; 1222 MatScalar *ap,value,*aa=a->a,*bap; 1223 1224 PetscFunctionBegin; 1225 for (k=0; k<m; k++) { /* loop over added rows */ 1226 row = im[k]; brow = row/4; 1227 rp = aj + ai[brow]; 1228 ap = aa + 16*ai[brow]; 1229 nrow = ailen[brow]; 1230 low = 0; 1231 high = nrow; 1232 for (l=0; l<n; l++) { /* loop over added columns */ 1233 col = in[l]; bcol = col/4; 1234 ridx = row % 4; cidx = col % 4; 1235 value = v[l + k*n]; 1236 if (col <= lastcol) low = 0; else high = nrow; 1237 lastcol = col; 1238 while (high-low > 7) { 1239 t = (low+high)/2; 1240 if (rp[t] > bcol) high = t; 1241 else low = t; 1242 } 1243 for (i=low; i<high; i++) { 1244 if (rp[i] > bcol) break; 1245 if (rp[i] == bcol) { 1246 bap = ap + 16*i + 4*cidx + ridx; 1247 *bap += value; 1248 goto noinsert1; 1249 } 1250 } 1251 N = nrow++ - 1; 1252 high++; /* added new column thus must search to one higher than before */ 1253 /* shift up all the later entries in this row */ 1254 for (ii=N; ii>=i; ii--) { 1255 rp[ii+1] = rp[ii]; 1256 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1257 } 1258 if (N>=i) { 1259 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1260 } 1261 rp[i] = bcol; 1262 ap[16*i + 4*cidx + ridx] = value; 1263 noinsert1:; 1264 low = i; 1265 } 1266 ailen[brow] = nrow; 1267 } 1268 PetscFunctionReturnVoid(); 1269 } 1270 EXTERN_C_END 1271 1272 /* 1273 Checks for missing diagonals 1274 */ 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1277 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1278 { 1279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 PetscInt *diag,*jj = a->j,i; 1282 1283 PetscFunctionBegin; 1284 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1285 *missing = PETSC_FALSE; 1286 if (A->rmap->n > 0 && !jj) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = 0; 1289 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1290 } else { 1291 diag = a->diag; 1292 for (i=0; i<a->mbs; i++) { 1293 if (jj[diag[i]] != i) { 1294 *missing = PETSC_TRUE; 1295 if (d) *d = i; 1296 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1297 } 1298 } 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1305 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1306 { 1307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscInt i,j,m = a->mbs; 1310 1311 PetscFunctionBegin; 1312 if (!a->diag) { 1313 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1314 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1315 a->free_diag = PETSC_TRUE; 1316 } 1317 for (i=0; i<m; i++) { 1318 a->diag[i] = a->i[i+1]; 1319 for (j=a->i[i]; j<a->i[i+1]; j++) { 1320 if (a->j[j] == i) { 1321 a->diag[i] = j; 1322 break; 1323 } 1324 } 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 1330 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1334 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1335 { 1336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1337 PetscErrorCode ierr; 1338 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1339 PetscInt *tia, *tja; 1340 1341 PetscFunctionBegin; 1342 *nn = n; 1343 if (!ia) PetscFunctionReturn(0); 1344 if (symmetric) { 1345 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1346 nz = tia[n]; 1347 } else { 1348 tia = a->i; tja = a->j; 1349 } 1350 1351 if (!blockcompressed && bs > 1) { 1352 (*nn) *= bs; 1353 /* malloc & create the natural set of indices */ 1354 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1355 if (n) { 1356 (*ia)[0] = 0; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1359 } 1360 } 1361 1362 for (i=1; i<n; i++) { 1363 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1364 for (j=1; j<bs; j++) { 1365 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1366 } 1367 } 1368 if (n) { 1369 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1370 } 1371 1372 if (ja) { 1373 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1374 cnt = 0; 1375 for (i=0; i<n; i++) { 1376 for (j=0; j<bs; j++) { 1377 for (k=tia[i]; k<tia[i+1]; k++) { 1378 for (l=0; l<bs; l++) { 1379 (*ja)[cnt++] = bs*tja[k] + l; 1380 } 1381 } 1382 } 1383 } 1384 } 1385 1386 n *= bs; 1387 nz *= bs*bs; 1388 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1389 ierr = PetscFree(tia);CHKERRQ(ierr); 1390 ierr = PetscFree(tja);CHKERRQ(ierr); 1391 } 1392 } else if (oshift == 1) { 1393 if (symmetric) { 1394 PetscInt nz = tia[A->rmap->n/bs]; 1395 /* add 1 to i and j indices */ 1396 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1397 *ia = tia; 1398 if (ja) { 1399 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1400 *ja = tja; 1401 } 1402 } else { 1403 PetscInt nz = a->i[A->rmap->n/bs]; 1404 /* malloc space and add 1 to i and j indices */ 1405 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1406 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1407 if (ja) { 1408 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1409 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1410 } 1411 } 1412 } else { 1413 *ia = tia; 1414 if (ja) *ja = tja; 1415 } 1416 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1422 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1423 { 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 if (!ia) PetscFunctionReturn(0); 1428 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1429 ierr = PetscFree(*ia);CHKERRQ(ierr); 1430 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1437 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1438 { 1439 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 #if defined(PETSC_USE_LOG) 1444 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1445 #endif 1446 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1447 if (a->row) { 1448 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1449 } 1450 if (a->col) { 1451 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1452 } 1453 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1454 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1455 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1456 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1457 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1458 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1459 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1460 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1461 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1462 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1463 1464 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1465 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1466 ierr = PetscFree(a);CHKERRQ(ierr); 1467 1468 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1469 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1471 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1472 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1473 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1474 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1475 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1476 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1482 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1483 { 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 switch (op) { 1489 case MAT_ROW_ORIENTED: 1490 a->roworiented = flg; 1491 break; 1492 case MAT_KEEP_NONZERO_PATTERN: 1493 a->keepnonzeropattern = flg; 1494 break; 1495 case MAT_NEW_NONZERO_LOCATIONS: 1496 a->nonew = (flg ? 0 : 1); 1497 break; 1498 case MAT_NEW_NONZERO_LOCATION_ERR: 1499 a->nonew = (flg ? -1 : 0); 1500 break; 1501 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1502 a->nonew = (flg ? -2 : 0); 1503 break; 1504 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1505 a->nounused = (flg ? -1 : 0); 1506 break; 1507 case MAT_NEW_DIAGONALS: 1508 case MAT_IGNORE_OFF_PROC_ENTRIES: 1509 case MAT_USE_HASH_TABLE: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 case MAT_SYMMETRIC: 1513 case MAT_STRUCTURALLY_SYMMETRIC: 1514 case MAT_HERMITIAN: 1515 case MAT_SYMMETRY_ETERNAL: 1516 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1517 break; 1518 default: 1519 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1526 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1527 { 1528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1529 PetscErrorCode ierr; 1530 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1531 MatScalar *aa,*aa_i; 1532 PetscScalar *v_i; 1533 1534 PetscFunctionBegin; 1535 bs = A->rmap->bs; 1536 ai = a->i; 1537 aj = a->j; 1538 aa = a->a; 1539 bs2 = a->bs2; 1540 1541 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1542 1543 bn = row/bs; /* Block number */ 1544 bp = row % bs; /* Block Position */ 1545 M = ai[bn+1] - ai[bn]; 1546 *nz = bs*M; 1547 1548 if (v) { 1549 *v = 0; 1550 if (*nz) { 1551 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1552 for (i=0; i<M; i++) { /* for each block in the block row */ 1553 v_i = *v + i*bs; 1554 aa_i = aa + bs2*(ai[bn] + i); 1555 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1556 } 1557 } 1558 } 1559 1560 if (idx) { 1561 *idx = 0; 1562 if (*nz) { 1563 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1564 for (i=0; i<M; i++) { /* for each block in the block row */ 1565 idx_i = *idx + i*bs; 1566 itmp = bs*aj[ai[bn] + i]; 1567 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1568 } 1569 } 1570 } 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1576 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1577 { 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1582 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1583 PetscFunctionReturn(0); 1584 } 1585 1586 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1590 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1591 { 1592 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1593 Mat C; 1594 PetscErrorCode ierr; 1595 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1596 PetscInt *rows,*cols,bs2=a->bs2; 1597 MatScalar *array; 1598 1599 PetscFunctionBegin; 1600 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1601 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1602 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1603 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1604 1605 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1606 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1607 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1608 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1609 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1610 ierr = PetscFree(col);CHKERRQ(ierr); 1611 } else { 1612 C = *B; 1613 } 1614 1615 array = a->a; 1616 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1617 for (i=0; i<mbs; i++) { 1618 cols[0] = i*bs; 1619 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1620 len = ai[i+1] - ai[i]; 1621 for (j=0; j<len; j++) { 1622 rows[0] = (*aj++)*bs; 1623 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1624 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1625 array += bs2; 1626 } 1627 } 1628 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1629 1630 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1631 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 1633 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1634 *B = C; 1635 } else { 1636 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1643 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1644 { 1645 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1646 PetscErrorCode ierr; 1647 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1648 int fd; 1649 PetscScalar *aa; 1650 FILE *file; 1651 1652 PetscFunctionBegin; 1653 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1654 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1655 col_lens[0] = MAT_FILE_CLASSID; 1656 1657 col_lens[1] = A->rmap->N; 1658 col_lens[2] = A->cmap->n; 1659 col_lens[3] = a->nz*bs2; 1660 1661 /* store lengths of each row and write (including header) to file */ 1662 count = 0; 1663 for (i=0; i<a->mbs; i++) { 1664 for (j=0; j<bs; j++) { 1665 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1666 } 1667 } 1668 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1669 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1670 1671 /* store column indices (zero start index) */ 1672 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1673 count = 0; 1674 for (i=0; i<a->mbs; i++) { 1675 for (j=0; j<bs; j++) { 1676 for (k=a->i[i]; k<a->i[i+1]; k++) { 1677 for (l=0; l<bs; l++) { 1678 jj[count++] = bs*a->j[k] + l; 1679 } 1680 } 1681 } 1682 } 1683 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1684 ierr = PetscFree(jj);CHKERRQ(ierr); 1685 1686 /* store nonzero values */ 1687 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1688 count = 0; 1689 for (i=0; i<a->mbs; i++) { 1690 for (j=0; j<bs; j++) { 1691 for (k=a->i[i]; k<a->i[i+1]; k++) { 1692 for (l=0; l<bs; l++) { 1693 aa[count++] = a->a[bs2*k + l*bs + j]; 1694 } 1695 } 1696 } 1697 } 1698 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1699 ierr = PetscFree(aa);CHKERRQ(ierr); 1700 1701 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1702 if (file) { 1703 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1710 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1711 { 1712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1713 PetscErrorCode ierr; 1714 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1715 PetscViewerFormat format; 1716 1717 PetscFunctionBegin; 1718 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1719 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1720 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1721 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1722 Mat aij; 1723 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1724 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1725 ierr = MatDestroy(aij);CHKERRQ(ierr); 1726 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1727 PetscFunctionReturn(0); 1728 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1729 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1730 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1731 for (i=0; i<a->mbs; i++) { 1732 for (j=0; j<bs; j++) { 1733 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1734 for (k=a->i[i]; k<a->i[i+1]; k++) { 1735 for (l=0; l<bs; l++) { 1736 #if defined(PETSC_USE_COMPLEX) 1737 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1738 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1739 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1740 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1741 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1742 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1743 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1744 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1745 } 1746 #else 1747 if (a->a[bs2*k + l*bs + j] != 0.0) { 1748 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1749 } 1750 #endif 1751 } 1752 } 1753 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1757 } else { 1758 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1759 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1760 for (i=0; i<a->mbs; i++) { 1761 for (j=0; j<bs; j++) { 1762 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1763 for (k=a->i[i]; k<a->i[i+1]; k++) { 1764 for (l=0; l<bs; l++) { 1765 #if defined(PETSC_USE_COMPLEX) 1766 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1767 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1768 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1769 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1770 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1771 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1772 } else { 1773 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1774 } 1775 #else 1776 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1777 #endif 1778 } 1779 } 1780 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1781 } 1782 } 1783 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1784 } 1785 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1791 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1792 { 1793 Mat A = (Mat) Aa; 1794 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1797 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1798 MatScalar *aa; 1799 PetscViewer viewer; 1800 1801 PetscFunctionBegin; 1802 1803 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1804 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1805 1806 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1807 1808 /* loop over matrix elements drawing boxes */ 1809 color = PETSC_DRAW_BLUE; 1810 for (i=0,row=0; i<mbs; i++,row+=bs) { 1811 for (j=a->i[i]; j<a->i[i+1]; j++) { 1812 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1813 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1814 aa = a->a + j*bs2; 1815 for (k=0; k<bs; k++) { 1816 for (l=0; l<bs; l++) { 1817 if (PetscRealPart(*aa++) >= 0.) continue; 1818 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1819 } 1820 } 1821 } 1822 } 1823 color = PETSC_DRAW_CYAN; 1824 for (i=0,row=0; i<mbs; i++,row+=bs) { 1825 for (j=a->i[i]; j<a->i[i+1]; j++) { 1826 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1827 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1828 aa = a->a + j*bs2; 1829 for (k=0; k<bs; k++) { 1830 for (l=0; l<bs; l++) { 1831 if (PetscRealPart(*aa++) != 0.) continue; 1832 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1833 } 1834 } 1835 } 1836 } 1837 1838 color = PETSC_DRAW_RED; 1839 for (i=0,row=0; i<mbs; i++,row+=bs) { 1840 for (j=a->i[i]; j<a->i[i+1]; j++) { 1841 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1842 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1843 aa = a->a + j*bs2; 1844 for (k=0; k<bs; k++) { 1845 for (l=0; l<bs; l++) { 1846 if (PetscRealPart(*aa++) <= 0.) continue; 1847 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1848 } 1849 } 1850 } 1851 } 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1857 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1858 { 1859 PetscErrorCode ierr; 1860 PetscReal xl,yl,xr,yr,w,h; 1861 PetscDraw draw; 1862 PetscBool isnull; 1863 1864 PetscFunctionBegin; 1865 1866 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1867 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1868 1869 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1870 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1871 xr += w; yr += h; xl = -w; yl = -h; 1872 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1873 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1874 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatView_SeqBAIJ" 1880 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1881 { 1882 PetscErrorCode ierr; 1883 PetscBool iascii,isbinary,isdraw; 1884 1885 PetscFunctionBegin; 1886 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1887 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1888 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1889 if (iascii){ 1890 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1891 } else if (isbinary) { 1892 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1893 } else if (isdraw) { 1894 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1895 } else { 1896 Mat B; 1897 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1898 ierr = MatView(B,viewer);CHKERRQ(ierr); 1899 ierr = MatDestroy(B);CHKERRQ(ierr); 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1907 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1908 { 1909 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1910 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1911 PetscInt *ai = a->i,*ailen = a->ilen; 1912 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1913 MatScalar *ap,*aa = a->a; 1914 1915 PetscFunctionBegin; 1916 for (k=0; k<m; k++) { /* loop over rows */ 1917 row = im[k]; brow = row/bs; 1918 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1919 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1920 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1921 nrow = ailen[brow]; 1922 for (l=0; l<n; l++) { /* loop over columns */ 1923 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1924 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1925 col = in[l] ; 1926 bcol = col/bs; 1927 cidx = col%bs; 1928 ridx = row%bs; 1929 high = nrow; 1930 low = 0; /* assume unsorted */ 1931 while (high-low > 5) { 1932 t = (low+high)/2; 1933 if (rp[t] > bcol) high = t; 1934 else low = t; 1935 } 1936 for (i=low; i<high; i++) { 1937 if (rp[i] > bcol) break; 1938 if (rp[i] == bcol) { 1939 *v++ = ap[bs2*i+bs*cidx+ridx]; 1940 goto finished; 1941 } 1942 } 1943 *v++ = 0.0; 1944 finished:; 1945 } 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1952 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1953 { 1954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1955 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1956 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1957 PetscErrorCode ierr; 1958 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1959 PetscBool roworiented=a->roworiented; 1960 const PetscScalar *value = v; 1961 MatScalar *ap,*aa = a->a,*bap; 1962 1963 PetscFunctionBegin; 1964 if (roworiented) { 1965 stepval = (n-1)*bs; 1966 } else { 1967 stepval = (m-1)*bs; 1968 } 1969 for (k=0; k<m; k++) { /* loop over added rows */ 1970 row = im[k]; 1971 if (row < 0) continue; 1972 #if defined(PETSC_USE_DEBUG) 1973 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1974 #endif 1975 rp = aj + ai[row]; 1976 ap = aa + bs2*ai[row]; 1977 rmax = imax[row]; 1978 nrow = ailen[row]; 1979 low = 0; 1980 high = nrow; 1981 for (l=0; l<n; l++) { /* loop over added columns */ 1982 if (in[l] < 0) continue; 1983 #if defined(PETSC_USE_DEBUG) 1984 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1985 #endif 1986 col = in[l]; 1987 if (roworiented) { 1988 value = v + (k*(stepval+bs) + l)*bs; 1989 } else { 1990 value = v + (l*(stepval+bs) + k)*bs; 1991 } 1992 if (col <= lastcol) low = 0; else high = nrow; 1993 lastcol = col; 1994 while (high-low > 7) { 1995 t = (low+high)/2; 1996 if (rp[t] > col) high = t; 1997 else low = t; 1998 } 1999 for (i=low; i<high; i++) { 2000 if (rp[i] > col) break; 2001 if (rp[i] == col) { 2002 bap = ap + bs2*i; 2003 if (roworiented) { 2004 if (is == ADD_VALUES) { 2005 for (ii=0; ii<bs; ii++,value+=stepval) { 2006 for (jj=ii; jj<bs2; jj+=bs) { 2007 bap[jj] += *value++; 2008 } 2009 } 2010 } else { 2011 for (ii=0; ii<bs; ii++,value+=stepval) { 2012 for (jj=ii; jj<bs2; jj+=bs) { 2013 bap[jj] = *value++; 2014 } 2015 } 2016 } 2017 } else { 2018 if (is == ADD_VALUES) { 2019 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2020 for (jj=0; jj<bs; jj++) { 2021 bap[jj] += value[jj]; 2022 } 2023 bap += bs; 2024 } 2025 } else { 2026 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2027 for (jj=0; jj<bs; jj++) { 2028 bap[jj] = value[jj]; 2029 } 2030 bap += bs; 2031 } 2032 } 2033 } 2034 goto noinsert2; 2035 } 2036 } 2037 if (nonew == 1) goto noinsert2; 2038 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2039 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2040 N = nrow++ - 1; high++; 2041 /* shift up all the later entries in this row */ 2042 for (ii=N; ii>=i; ii--) { 2043 rp[ii+1] = rp[ii]; 2044 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2045 } 2046 if (N >= i) { 2047 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2048 } 2049 rp[i] = col; 2050 bap = ap + bs2*i; 2051 if (roworiented) { 2052 for (ii=0; ii<bs; ii++,value+=stepval) { 2053 for (jj=ii; jj<bs2; jj+=bs) { 2054 bap[jj] = *value++; 2055 } 2056 } 2057 } else { 2058 for (ii=0; ii<bs; ii++,value+=stepval) { 2059 for (jj=0; jj<bs; jj++) { 2060 *bap++ = *value++; 2061 } 2062 } 2063 } 2064 noinsert2:; 2065 low = i; 2066 } 2067 ailen[row] = nrow; 2068 } 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2074 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2075 { 2076 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2077 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2078 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2079 PetscErrorCode ierr; 2080 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2081 MatScalar *aa = a->a,*ap; 2082 PetscReal ratio=0.6; 2083 2084 PetscFunctionBegin; 2085 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2086 2087 if (m) rmax = ailen[0]; 2088 for (i=1; i<mbs; i++) { 2089 /* move each row back by the amount of empty slots (fshift) before it*/ 2090 fshift += imax[i-1] - ailen[i-1]; 2091 rmax = PetscMax(rmax,ailen[i]); 2092 if (fshift) { 2093 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2094 N = ailen[i]; 2095 for (j=0; j<N; j++) { 2096 ip[j-fshift] = ip[j]; 2097 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2098 } 2099 } 2100 ai[i] = ai[i-1] + ailen[i-1]; 2101 } 2102 if (mbs) { 2103 fshift += imax[mbs-1] - ailen[mbs-1]; 2104 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2105 } 2106 /* reset ilen and imax for each row */ 2107 for (i=0; i<mbs; i++) { 2108 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2109 } 2110 a->nz = ai[mbs]; 2111 2112 /* diagonals may have moved, so kill the diagonal pointers */ 2113 a->idiagvalid = PETSC_FALSE; 2114 if (fshift && a->diag) { 2115 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2116 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2117 a->diag = 0; 2118 } 2119 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2120 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2121 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2122 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2123 A->info.mallocs += a->reallocs; 2124 a->reallocs = 0; 2125 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2126 2127 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 2128 if (a->compressedrow.use){ 2129 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2130 } 2131 2132 A->same_nonzero = PETSC_TRUE; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 /* 2137 This function returns an array of flags which indicate the locations of contiguous 2138 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2139 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2140 Assume: sizes should be long enough to hold all the values. 2141 */ 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2144 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2145 { 2146 PetscInt i,j,k,row; 2147 PetscBool flg; 2148 2149 PetscFunctionBegin; 2150 for (i=0,j=0; i<n; j++) { 2151 row = idx[i]; 2152 if (row%bs!=0) { /* Not the begining of a block */ 2153 sizes[j] = 1; 2154 i++; 2155 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2156 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2157 i++; 2158 } else { /* Begining of the block, so check if the complete block exists */ 2159 flg = PETSC_TRUE; 2160 for (k=1; k<bs; k++) { 2161 if (row+k != idx[i+k]) { /* break in the block */ 2162 flg = PETSC_FALSE; 2163 break; 2164 } 2165 } 2166 if (flg) { /* No break in the bs */ 2167 sizes[j] = bs; 2168 i+= bs; 2169 } else { 2170 sizes[j] = 1; 2171 i++; 2172 } 2173 } 2174 } 2175 *bs_max = j; 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2181 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2182 { 2183 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2184 PetscErrorCode ierr; 2185 PetscInt i,j,k,count,*rows; 2186 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2187 PetscScalar zero = 0.0; 2188 MatScalar *aa; 2189 2190 PetscFunctionBegin; 2191 /* Make a copy of the IS and sort it */ 2192 /* allocate memory for rows,sizes */ 2193 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2194 2195 /* copy IS values to rows, and sort them */ 2196 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2197 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2198 if (baij->keepnonzeropattern) { 2199 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2200 bs_max = is_n; 2201 A->same_nonzero = PETSC_TRUE; 2202 } else { 2203 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2204 A->same_nonzero = PETSC_FALSE; 2205 } 2206 2207 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2208 row = rows[j]; 2209 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2210 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2211 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2212 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2213 if (diag != 0.0) { 2214 if (baij->ilen[row/bs] > 0) { 2215 baij->ilen[row/bs] = 1; 2216 baij->j[baij->i[row/bs]] = row/bs; 2217 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2218 } 2219 /* Now insert all the diagonal values for this bs */ 2220 for (k=0; k<bs; k++) { 2221 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2222 } 2223 } else { /* (diag == 0.0) */ 2224 baij->ilen[row/bs] = 0; 2225 } /* end (diag == 0.0) */ 2226 } else { /* (sizes[i] != bs) */ 2227 #if defined (PETSC_USE_DEBUG) 2228 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2229 #endif 2230 for (k=0; k<count; k++) { 2231 aa[0] = zero; 2232 aa += bs; 2233 } 2234 if (diag != 0.0) { 2235 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2236 } 2237 } 2238 } 2239 2240 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2241 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2247 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2248 { 2249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2250 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2251 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2252 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2253 PetscErrorCode ierr; 2254 PetscInt ridx,cidx,bs2=a->bs2; 2255 PetscBool roworiented=a->roworiented; 2256 MatScalar *ap,value,*aa=a->a,*bap; 2257 2258 PetscFunctionBegin; 2259 if (v) PetscValidScalarPointer(v,6); 2260 for (k=0; k<m; k++) { /* loop over added rows */ 2261 row = im[k]; 2262 brow = row/bs; 2263 if (row < 0) continue; 2264 #if defined(PETSC_USE_DEBUG) 2265 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2266 #endif 2267 rp = aj + ai[brow]; 2268 ap = aa + bs2*ai[brow]; 2269 rmax = imax[brow]; 2270 nrow = ailen[brow]; 2271 low = 0; 2272 high = nrow; 2273 for (l=0; l<n; l++) { /* loop over added columns */ 2274 if (in[l] < 0) continue; 2275 #if defined(PETSC_USE_DEBUG) 2276 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2277 #endif 2278 col = in[l]; bcol = col/bs; 2279 ridx = row % bs; cidx = col % bs; 2280 if (roworiented) { 2281 value = v[l + k*n]; 2282 } else { 2283 value = v[k + l*m]; 2284 } 2285 if (col <= lastcol) low = 0; else high = nrow; 2286 lastcol = col; 2287 while (high-low > 7) { 2288 t = (low+high)/2; 2289 if (rp[t] > bcol) high = t; 2290 else low = t; 2291 } 2292 for (i=low; i<high; i++) { 2293 if (rp[i] > bcol) break; 2294 if (rp[i] == bcol) { 2295 bap = ap + bs2*i + bs*cidx + ridx; 2296 if (is == ADD_VALUES) *bap += value; 2297 else *bap = value; 2298 goto noinsert1; 2299 } 2300 } 2301 if (nonew == 1) goto noinsert1; 2302 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2303 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2304 N = nrow++ - 1; high++; 2305 /* shift up all the later entries in this row */ 2306 for (ii=N; ii>=i; ii--) { 2307 rp[ii+1] = rp[ii]; 2308 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2309 } 2310 if (N>=i) { 2311 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2312 } 2313 rp[i] = bcol; 2314 ap[bs2*i + bs*cidx + ridx] = value; 2315 a->nz++; 2316 noinsert1:; 2317 low = i; 2318 } 2319 ailen[brow] = nrow; 2320 } 2321 A->same_nonzero = PETSC_FALSE; 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2327 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2328 { 2329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2330 Mat outA; 2331 PetscErrorCode ierr; 2332 PetscBool row_identity,col_identity; 2333 2334 PetscFunctionBegin; 2335 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2336 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2337 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2338 if (!row_identity || !col_identity) { 2339 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2340 } 2341 2342 outA = inA; 2343 inA->factortype = MAT_FACTOR_LU; 2344 2345 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2346 2347 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2348 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2349 a->row = row; 2350 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2351 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2352 a->col = col; 2353 2354 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2355 if (a->icol) { 2356 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2357 } 2358 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2359 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2360 2361 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool )(row_identity && col_identity));CHKERRQ(ierr); 2362 if (!a->solve_work) { 2363 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2364 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2365 } 2366 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2367 2368 PetscFunctionReturn(0); 2369 } 2370 2371 EXTERN_C_BEGIN 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2374 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2375 { 2376 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2377 PetscInt i,nz,mbs; 2378 2379 PetscFunctionBegin; 2380 nz = baij->maxnz; 2381 mbs = baij->mbs; 2382 for (i=0; i<nz; i++) { 2383 baij->j[i] = indices[i]; 2384 } 2385 baij->nz = nz; 2386 for (i=0; i<mbs; i++) { 2387 baij->ilen[i] = baij->imax[i]; 2388 } 2389 PetscFunctionReturn(0); 2390 } 2391 EXTERN_C_END 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2395 /*@ 2396 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2397 in the matrix. 2398 2399 Input Parameters: 2400 + mat - the SeqBAIJ matrix 2401 - indices - the column indices 2402 2403 Level: advanced 2404 2405 Notes: 2406 This can be called if you have precomputed the nonzero structure of the 2407 matrix and want to provide it to the matrix object to improve the performance 2408 of the MatSetValues() operation. 2409 2410 You MUST have set the correct numbers of nonzeros per row in the call to 2411 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2412 2413 MUST be called before any calls to MatSetValues(); 2414 2415 @*/ 2416 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2417 { 2418 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2419 2420 PetscFunctionBegin; 2421 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2422 PetscValidPointer(indices,2); 2423 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2424 if (f) { 2425 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2426 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2432 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2433 { 2434 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2435 PetscErrorCode ierr; 2436 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2437 PetscReal atmp; 2438 PetscScalar *x,zero = 0.0; 2439 MatScalar *aa; 2440 PetscInt ncols,brow,krow,kcol; 2441 2442 PetscFunctionBegin; 2443 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2444 bs = A->rmap->bs; 2445 aa = a->a; 2446 ai = a->i; 2447 aj = a->j; 2448 mbs = a->mbs; 2449 2450 ierr = VecSet(v,zero);CHKERRQ(ierr); 2451 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2452 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2453 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2454 for (i=0; i<mbs; i++) { 2455 ncols = ai[1] - ai[0]; ai++; 2456 brow = bs*i; 2457 for (j=0; j<ncols; j++){ 2458 for (kcol=0; kcol<bs; kcol++){ 2459 for (krow=0; krow<bs; krow++){ 2460 atmp = PetscAbsScalar(*aa);aa++; 2461 row = brow + krow; /* row index */ 2462 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2463 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2464 } 2465 } 2466 aj++; 2467 } 2468 } 2469 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatCopy_SeqBAIJ" 2475 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2476 { 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 /* If the two matrices have the same copy implementation, use fast copy. */ 2481 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2484 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2485 2486 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2487 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2488 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2489 } else { 2490 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2491 } 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2497 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2498 { 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2508 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2509 { 2510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2511 PetscFunctionBegin; 2512 *array = a->a; 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2518 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2519 { 2520 PetscFunctionBegin; 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2526 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2527 { 2528 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2529 PetscErrorCode ierr; 2530 PetscInt i,bs=Y->rmap->bs,j,bs2; 2531 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2532 2533 PetscFunctionBegin; 2534 if (str == SAME_NONZERO_PATTERN) { 2535 PetscScalar alpha = a; 2536 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2537 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2538 if (y->xtoy && y->XtoY != X) { 2539 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2540 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2541 } 2542 if (!y->xtoy) { /* get xtoy */ 2543 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2544 y->XtoY = X; 2545 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2546 } 2547 bs2 = bs*bs; 2548 for (i=0; i<x->nz; i++) { 2549 j = 0; 2550 while (j < bs2){ 2551 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2552 j++; 2553 } 2554 } 2555 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2556 } else { 2557 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2564 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2565 { 2566 PetscInt rbs,cbs; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2571 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2572 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2573 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2579 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2580 { 2581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2582 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2583 MatScalar *aa = a->a; 2584 2585 PetscFunctionBegin; 2586 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2592 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2593 { 2594 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2595 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2596 MatScalar *aa = a->a; 2597 2598 PetscFunctionBegin; 2599 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2607 /* 2608 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2609 */ 2610 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2611 { 2612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2613 PetscErrorCode ierr; 2614 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2615 PetscInt nz = a->i[m],row,*jj,mr,col; 2616 2617 PetscFunctionBegin; 2618 *nn = n; 2619 if (!ia) PetscFunctionReturn(0); 2620 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2621 else { 2622 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2623 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2624 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2625 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2626 jj = a->j; 2627 for (i=0; i<nz; i++) { 2628 collengths[jj[i]]++; 2629 } 2630 cia[0] = oshift; 2631 for (i=0; i<n; i++) { 2632 cia[i+1] = cia[i] + collengths[i]; 2633 } 2634 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2635 jj = a->j; 2636 for (row=0; row<m; row++) { 2637 mr = a->i[row+1] - a->i[row]; 2638 for (i=0; i<mr; i++) { 2639 col = *jj++; 2640 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2641 } 2642 } 2643 ierr = PetscFree(collengths);CHKERRQ(ierr); 2644 *ia = cia; *ja = cja; 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2651 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2652 { 2653 PetscErrorCode ierr; 2654 2655 PetscFunctionBegin; 2656 if (!ia) PetscFunctionReturn(0); 2657 ierr = PetscFree(*ia);CHKERRQ(ierr); 2658 ierr = PetscFree(*ja);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2664 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2665 { 2666 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2667 PetscErrorCode ierr; 2668 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2669 PetscScalar dx,*y,*xx,*w3_array; 2670 PetscScalar *vscale_array; 2671 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2672 Vec w1=coloring->w1,w2=coloring->w2,w3; 2673 void *fctx = coloring->fctx; 2674 PetscBool flg = PETSC_FALSE; 2675 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2676 Vec x1_tmp; 2677 2678 PetscFunctionBegin; 2679 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2680 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2681 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2682 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2683 2684 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2685 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2686 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2687 if (flg) { 2688 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2689 } else { 2690 PetscBool assembled; 2691 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2692 if (assembled) { 2693 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2694 } 2695 } 2696 2697 x1_tmp = x1; 2698 if (!coloring->vscale){ 2699 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2700 } 2701 2702 /* 2703 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2704 coloring->F for the coarser grids from the finest 2705 */ 2706 if (coloring->F) { 2707 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2708 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2709 if (m1 != m2) { 2710 coloring->F = 0; 2711 } 2712 } 2713 2714 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2715 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2716 } 2717 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2718 2719 /* Set w1 = F(x1) */ 2720 if (coloring->F) { 2721 w1 = coloring->F; /* use already computed value of function */ 2722 coloring->F = 0; 2723 } else { 2724 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2725 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2726 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2727 } 2728 2729 if (!coloring->w3) { 2730 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2731 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2732 } 2733 w3 = coloring->w3; 2734 2735 CHKMEMQ; 2736 /* Compute all the local scale factors, including ghost points */ 2737 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2738 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2739 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2740 if (ctype == IS_COLORING_GHOSTED){ 2741 col_start = 0; col_end = N; 2742 } else if (ctype == IS_COLORING_GLOBAL){ 2743 xx = xx - start; 2744 vscale_array = vscale_array - start; 2745 col_start = start; col_end = N + start; 2746 } CHKMEMQ; 2747 for (col=col_start; col<col_end; col++){ 2748 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2749 if (coloring->htype[0] == 'w') { 2750 dx = 1.0 + unorm; 2751 } else { 2752 dx = xx[col]; 2753 } 2754 if (dx == 0.0) dx = 1.0; 2755 #if !defined(PETSC_USE_COMPLEX) 2756 if (dx < umin && dx >= 0.0) dx = umin; 2757 else if (dx < 0.0 && dx > -umin) dx = -umin; 2758 #else 2759 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2760 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2761 #endif 2762 dx *= epsilon; 2763 vscale_array[col] = 1.0/dx; 2764 } CHKMEMQ; 2765 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2766 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2767 if (ctype == IS_COLORING_GLOBAL){ 2768 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2769 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2770 } 2771 CHKMEMQ; 2772 if (coloring->vscaleforrow) { 2773 vscaleforrow = coloring->vscaleforrow; 2774 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2775 2776 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2777 /* 2778 Loop over each color 2779 */ 2780 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2781 for (k=0; k<coloring->ncolors; k++) { 2782 coloring->currentcolor = k; 2783 for (i=0; i<bs; i++) { 2784 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2785 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2786 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2787 /* 2788 Loop over each column associated with color 2789 adding the perturbation to the vector w3. 2790 */ 2791 for (l=0; l<coloring->ncolumns[k]; l++) { 2792 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2793 if (coloring->htype[0] == 'w') { 2794 dx = 1.0 + unorm; 2795 } else { 2796 dx = xx[col]; 2797 } 2798 if (dx == 0.0) dx = 1.0; 2799 #if !defined(PETSC_USE_COMPLEX) 2800 if (dx < umin && dx >= 0.0) dx = umin; 2801 else if (dx < 0.0 && dx > -umin) dx = -umin; 2802 #else 2803 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2804 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2805 #endif 2806 dx *= epsilon; 2807 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2808 w3_array[col] += dx; 2809 } 2810 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2811 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2812 2813 /* 2814 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2815 w2 = F(x1 + dx) - F(x1) 2816 */ 2817 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2818 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2819 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2820 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2821 2822 /* 2823 Loop over rows of vector, putting results into Jacobian matrix 2824 */ 2825 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2826 for (l=0; l<coloring->nrows[k]; l++) { 2827 row = bs*coloring->rows[k][l]; /* local row index */ 2828 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2829 for (j=0; j<bs; j++) { 2830 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2831 srows[j] = row + start + j; 2832 } 2833 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2834 } 2835 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2836 } 2837 } /* endof for each color */ 2838 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2839 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2840 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2841 ierr = PetscFree(srows);CHKERRQ(ierr); 2842 2843 coloring->currentcolor = -1; 2844 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2846 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 /* -------------------------------------------------------------------*/ 2851 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2852 MatGetRow_SeqBAIJ, 2853 MatRestoreRow_SeqBAIJ, 2854 MatMult_SeqBAIJ_N, 2855 /* 4*/ MatMultAdd_SeqBAIJ_N, 2856 MatMultTranspose_SeqBAIJ, 2857 MatMultTransposeAdd_SeqBAIJ, 2858 0, 2859 0, 2860 0, 2861 /*10*/ 0, 2862 MatLUFactor_SeqBAIJ, 2863 0, 2864 0, 2865 MatTranspose_SeqBAIJ, 2866 /*15*/ MatGetInfo_SeqBAIJ, 2867 MatEqual_SeqBAIJ, 2868 MatGetDiagonal_SeqBAIJ, 2869 MatDiagonalScale_SeqBAIJ, 2870 MatNorm_SeqBAIJ, 2871 /*20*/ 0, 2872 MatAssemblyEnd_SeqBAIJ, 2873 MatSetOption_SeqBAIJ, 2874 MatZeroEntries_SeqBAIJ, 2875 /*24*/ MatZeroRows_SeqBAIJ, 2876 0, 2877 0, 2878 0, 2879 0, 2880 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2881 0, 2882 0, 2883 MatGetArray_SeqBAIJ, 2884 MatRestoreArray_SeqBAIJ, 2885 /*34*/ MatDuplicate_SeqBAIJ, 2886 0, 2887 0, 2888 MatILUFactor_SeqBAIJ, 2889 0, 2890 /*39*/ MatAXPY_SeqBAIJ, 2891 MatGetSubMatrices_SeqBAIJ, 2892 MatIncreaseOverlap_SeqBAIJ, 2893 MatGetValues_SeqBAIJ, 2894 MatCopy_SeqBAIJ, 2895 /*44*/ 0, 2896 MatScale_SeqBAIJ, 2897 0, 2898 0, 2899 0, 2900 /*49*/ MatSetBlockSize_SeqBAIJ, 2901 MatGetRowIJ_SeqBAIJ, 2902 MatRestoreRowIJ_SeqBAIJ, 2903 MatGetColumnIJ_SeqBAIJ, 2904 MatRestoreColumnIJ_SeqBAIJ, 2905 /*54*/ MatFDColoringCreate_SeqAIJ, 2906 0, 2907 0, 2908 0, 2909 MatSetValuesBlocked_SeqBAIJ, 2910 /*59*/ MatGetSubMatrix_SeqBAIJ, 2911 MatDestroy_SeqBAIJ, 2912 MatView_SeqBAIJ, 2913 0, 2914 0, 2915 /*64*/ 0, 2916 0, 2917 0, 2918 0, 2919 0, 2920 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2921 0, 2922 MatConvert_Basic, 2923 0, 2924 0, 2925 /*74*/ 0, 2926 MatFDColoringApply_BAIJ, 2927 0, 2928 0, 2929 0, 2930 /*79*/ 0, 2931 0, 2932 0, 2933 0, 2934 MatLoad_SeqBAIJ, 2935 /*84*/ 0, 2936 0, 2937 0, 2938 0, 2939 0, 2940 /*89*/ 0, 2941 0, 2942 0, 2943 0, 2944 0, 2945 /*94*/ 0, 2946 0, 2947 0, 2948 0, 2949 0, 2950 /*99*/0, 2951 0, 2952 0, 2953 0, 2954 0, 2955 /*104*/0, 2956 MatRealPart_SeqBAIJ, 2957 MatImaginaryPart_SeqBAIJ, 2958 0, 2959 0, 2960 /*109*/0, 2961 0, 2962 0, 2963 0, 2964 MatMissingDiagonal_SeqBAIJ, 2965 /*114*/0, 2966 0, 2967 0, 2968 0, 2969 0, 2970 /*119*/0, 2971 0, 2972 MatMultHermitianTranspose_SeqBAIJ, 2973 MatMultHermitianTransposeAdd_SeqBAIJ, 2974 0, 2975 }; 2976 2977 EXTERN_C_BEGIN 2978 #undef __FUNCT__ 2979 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2980 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2981 { 2982 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2983 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2988 2989 /* allocate space for values if not already there */ 2990 if (!aij->saved_values) { 2991 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2992 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2993 } 2994 2995 /* copy values over */ 2996 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2997 PetscFunctionReturn(0); 2998 } 2999 EXTERN_C_END 3000 3001 EXTERN_C_BEGIN 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3004 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 3005 { 3006 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3007 PetscErrorCode ierr; 3008 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3009 3010 PetscFunctionBegin; 3011 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3012 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3013 3014 /* copy values over */ 3015 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3016 PetscFunctionReturn(0); 3017 } 3018 EXTERN_C_END 3019 3020 EXTERN_C_BEGIN 3021 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3022 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3023 EXTERN_C_END 3024 3025 EXTERN_C_BEGIN 3026 #undef __FUNCT__ 3027 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3028 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3029 { 3030 Mat_SeqBAIJ *b; 3031 PetscErrorCode ierr; 3032 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3033 PetscBool flg,skipallocation = PETSC_FALSE; 3034 3035 PetscFunctionBegin; 3036 3037 if (nz == MAT_SKIP_ALLOCATION) { 3038 skipallocation = PETSC_TRUE; 3039 nz = 0; 3040 } 3041 3042 if (bs < 0) { 3043 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3044 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3045 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3046 bs = PetscAbs(bs); 3047 } 3048 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3049 bs = newbs; 3050 3051 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3052 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3053 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3054 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3055 3056 B->preallocated = PETSC_TRUE; 3057 3058 mbs = B->rmap->n/bs; 3059 nbs = B->cmap->n/bs; 3060 bs2 = bs*bs; 3061 3062 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3063 3064 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3065 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3066 if (nnz) { 3067 for (i=0; i<mbs; i++) { 3068 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3069 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3070 } 3071 } 3072 3073 b = (Mat_SeqBAIJ*)B->data; 3074 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3075 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3076 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3077 3078 if (!flg) { 3079 switch (bs) { 3080 case 1: 3081 B->ops->mult = MatMult_SeqBAIJ_1; 3082 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3083 B->ops->sor = MatSOR_SeqBAIJ_1; 3084 break; 3085 case 2: 3086 B->ops->mult = MatMult_SeqBAIJ_2; 3087 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3088 B->ops->sor = MatSOR_SeqBAIJ_2; 3089 break; 3090 case 3: 3091 B->ops->mult = MatMult_SeqBAIJ_3; 3092 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3093 B->ops->sor = MatSOR_SeqBAIJ_3; 3094 break; 3095 case 4: 3096 B->ops->mult = MatMult_SeqBAIJ_4; 3097 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3098 B->ops->sor = MatSOR_SeqBAIJ_4; 3099 break; 3100 case 5: 3101 B->ops->mult = MatMult_SeqBAIJ_5; 3102 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3103 B->ops->sor = MatSOR_SeqBAIJ_5; 3104 break; 3105 case 6: 3106 B->ops->mult = MatMult_SeqBAIJ_6; 3107 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3108 B->ops->sor = MatSOR_SeqBAIJ_6; 3109 break; 3110 case 7: 3111 B->ops->mult = MatMult_SeqBAIJ_7; 3112 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3113 B->ops->sor = MatSOR_SeqBAIJ_7; 3114 break; 3115 case 15: 3116 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3117 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3118 B->ops->sor = MatSOR_SeqBAIJ_N; 3119 break; 3120 default: 3121 B->ops->mult = MatMult_SeqBAIJ_N; 3122 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3123 B->ops->sor = MatSOR_SeqBAIJ_N; 3124 break; 3125 } 3126 } 3127 B->rmap->bs = bs; 3128 b->mbs = mbs; 3129 b->nbs = nbs; 3130 if (!skipallocation) { 3131 if (!b->imax) { 3132 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3133 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3134 b->free_imax_ilen = PETSC_TRUE; 3135 } 3136 /* b->ilen will count nonzeros in each block row so far. */ 3137 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3138 if (!nnz) { 3139 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3140 else if (nz < 0) nz = 1; 3141 for (i=0; i<mbs; i++) b->imax[i] = nz; 3142 nz = nz*mbs; 3143 } else { 3144 nz = 0; 3145 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3146 } 3147 3148 /* allocate the matrix space */ 3149 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3150 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3151 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3152 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3153 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3154 b->singlemalloc = PETSC_TRUE; 3155 b->i[0] = 0; 3156 for (i=1; i<mbs+1; i++) { 3157 b->i[i] = b->i[i-1] + b->imax[i-1]; 3158 } 3159 b->free_a = PETSC_TRUE; 3160 b->free_ij = PETSC_TRUE; 3161 } else { 3162 b->free_a = PETSC_FALSE; 3163 b->free_ij = PETSC_FALSE; 3164 } 3165 3166 B->rmap->bs = bs; 3167 b->bs2 = bs2; 3168 b->mbs = mbs; 3169 b->nz = 0; 3170 b->maxnz = nz; 3171 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3172 PetscFunctionReturn(0); 3173 } 3174 EXTERN_C_END 3175 3176 EXTERN_C_BEGIN 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3179 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3180 { 3181 PetscInt i,m,nz,nz_max=0,*nnz; 3182 PetscScalar *values=0; 3183 PetscErrorCode ierr; 3184 3185 PetscFunctionBegin; 3186 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3187 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3188 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3189 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3190 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3191 m = B->rmap->n/bs; 3192 3193 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3194 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3195 for(i=0; i<m; i++) { 3196 nz = ii[i+1]- ii[i]; 3197 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3198 nz_max = PetscMax(nz_max, nz); 3199 nnz[i] = nz; 3200 } 3201 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3202 ierr = PetscFree(nnz);CHKERRQ(ierr); 3203 3204 values = (PetscScalar*)V; 3205 if (!values) { 3206 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3207 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3208 } 3209 for (i=0; i<m; i++) { 3210 PetscInt ncols = ii[i+1] - ii[i]; 3211 const PetscInt *icols = jj + ii[i]; 3212 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3213 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3214 } 3215 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3216 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3218 3219 PetscFunctionReturn(0); 3220 } 3221 EXTERN_C_END 3222 3223 3224 EXTERN_C_BEGIN 3225 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3226 #if defined(PETSC_HAVE_MUMPS) 3227 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3228 #endif 3229 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3230 EXTERN_C_END 3231 3232 /*MC 3233 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3234 block sparse compressed row format. 3235 3236 Options Database Keys: 3237 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3238 3239 Level: beginner 3240 3241 .seealso: MatCreateSeqBAIJ() 3242 M*/ 3243 3244 3245 EXTERN_C_BEGIN 3246 #undef __FUNCT__ 3247 #define __FUNCT__ "MatCreate_SeqBAIJ" 3248 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3249 { 3250 PetscErrorCode ierr; 3251 PetscMPIInt size; 3252 Mat_SeqBAIJ *b; 3253 3254 PetscFunctionBegin; 3255 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3256 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3257 3258 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3259 B->data = (void*)b; 3260 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3261 B->mapping = 0; 3262 b->row = 0; 3263 b->col = 0; 3264 b->icol = 0; 3265 b->reallocs = 0; 3266 b->saved_values = 0; 3267 3268 b->roworiented = PETSC_TRUE; 3269 b->nonew = 0; 3270 b->diag = 0; 3271 b->solve_work = 0; 3272 b->mult_work = 0; 3273 B->spptr = 0; 3274 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3275 b->keepnonzeropattern = PETSC_FALSE; 3276 b->xtoy = 0; 3277 b->XtoY = 0; 3278 b->compressedrow.use = PETSC_FALSE; 3279 b->compressedrow.nrows = 0; 3280 b->compressedrow.i = PETSC_NULL; 3281 b->compressedrow.rindex = PETSC_NULL; 3282 b->compressedrow.checked = PETSC_FALSE; 3283 B->same_nonzero = PETSC_FALSE; 3284 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3286 "MatGetFactorAvailable_seqbaij_petsc", 3287 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3289 "MatGetFactor_seqbaij_petsc", 3290 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3291 #if defined(PETSC_HAVE_MUMPS) 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3293 #endif 3294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3295 "MatInvertBlockDiagonal_SeqBAIJ", 3296 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3297 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3298 "MatStoreValues_SeqBAIJ", 3299 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3301 "MatRetrieveValues_SeqBAIJ", 3302 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3303 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3304 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3305 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3306 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3307 "MatConvert_SeqBAIJ_SeqAIJ", 3308 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3309 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3310 "MatConvert_SeqBAIJ_SeqSBAIJ", 3311 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3312 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3313 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3314 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3315 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3316 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3317 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3318 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3319 PetscFunctionReturn(0); 3320 } 3321 EXTERN_C_END 3322 3323 #undef __FUNCT__ 3324 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3325 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3326 { 3327 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3328 PetscErrorCode ierr; 3329 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3330 3331 PetscFunctionBegin; 3332 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3333 3334 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3335 c->imax = a->imax; 3336 c->ilen = a->ilen; 3337 c->free_imax_ilen = PETSC_FALSE; 3338 } else { 3339 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3340 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3341 for (i=0; i<mbs; i++) { 3342 c->imax[i] = a->imax[i]; 3343 c->ilen[i] = a->ilen[i]; 3344 } 3345 c->free_imax_ilen = PETSC_TRUE; 3346 } 3347 3348 /* allocate the matrix space */ 3349 if (mallocmatspace){ 3350 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3351 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3352 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3353 c->singlemalloc = PETSC_FALSE; 3354 c->free_ij = PETSC_FALSE; 3355 c->i = a->i; 3356 c->j = a->j; 3357 c->parent = A; 3358 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3359 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3360 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3361 } else { 3362 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3363 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3364 c->singlemalloc = PETSC_TRUE; 3365 c->free_ij = PETSC_TRUE; 3366 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3367 if (mbs > 0) { 3368 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3369 if (cpvalues == MAT_COPY_VALUES) { 3370 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3371 } else { 3372 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3373 } 3374 } 3375 } 3376 } 3377 3378 c->roworiented = a->roworiented; 3379 c->nonew = a->nonew; 3380 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3381 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3382 c->bs2 = a->bs2; 3383 c->mbs = a->mbs; 3384 c->nbs = a->nbs; 3385 3386 if (a->diag) { 3387 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3388 c->diag = a->diag; 3389 c->free_diag = PETSC_FALSE; 3390 } else { 3391 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3392 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3393 for (i=0; i<mbs; i++) { 3394 c->diag[i] = a->diag[i]; 3395 } 3396 c->free_diag = PETSC_TRUE; 3397 } 3398 } else c->diag = 0; 3399 c->nz = a->nz; 3400 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3401 c->solve_work = 0; 3402 c->mult_work = 0; 3403 c->free_a = PETSC_TRUE; 3404 c->free_ij = PETSC_TRUE; 3405 C->preallocated = PETSC_TRUE; 3406 C->assembled = PETSC_TRUE; 3407 3408 c->compressedrow.use = a->compressedrow.use; 3409 c->compressedrow.nrows = a->compressedrow.nrows; 3410 c->compressedrow.checked = a->compressedrow.checked; 3411 if (a->compressedrow.checked && a->compressedrow.use){ 3412 i = a->compressedrow.nrows; 3413 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3414 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3415 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3416 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3417 } else { 3418 c->compressedrow.use = PETSC_FALSE; 3419 c->compressedrow.i = PETSC_NULL; 3420 c->compressedrow.rindex = PETSC_NULL; 3421 } 3422 C->same_nonzero = A->same_nonzero; 3423 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3424 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3425 PetscFunctionReturn(0); 3426 } 3427 3428 #undef __FUNCT__ 3429 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3430 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3431 { 3432 PetscErrorCode ierr; 3433 3434 PetscFunctionBegin; 3435 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3436 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3437 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3438 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3439 PetscFunctionReturn(0); 3440 } 3441 3442 #undef __FUNCT__ 3443 #define __FUNCT__ "MatLoad_SeqBAIJ" 3444 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3445 { 3446 Mat_SeqBAIJ *a; 3447 PetscErrorCode ierr; 3448 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3449 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3450 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3451 PetscInt *masked,nmask,tmp,bs2,ishift; 3452 PetscMPIInt size; 3453 int fd; 3454 PetscScalar *aa; 3455 MPI_Comm comm = ((PetscObject)viewer)->comm; 3456 3457 PetscFunctionBegin; 3458 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3459 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3460 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3461 bs2 = bs*bs; 3462 3463 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3464 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3465 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3466 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3467 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3468 M = header[1]; N = header[2]; nz = header[3]; 3469 3470 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3471 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3472 3473 /* 3474 This code adds extra rows to make sure the number of rows is 3475 divisible by the blocksize 3476 */ 3477 mbs = M/bs; 3478 extra_rows = bs - M + bs*(mbs); 3479 if (extra_rows == bs) extra_rows = 0; 3480 else mbs++; 3481 if (extra_rows) { 3482 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3483 } 3484 3485 /* Set global sizes if not already set */ 3486 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3487 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3488 } else { /* Check if the matrix global sizes are correct */ 3489 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3490 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3491 } 3492 3493 /* read in row lengths */ 3494 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3495 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3496 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3497 3498 /* read in column indices */ 3499 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3500 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3501 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3502 3503 /* loop over row lengths determining block row lengths */ 3504 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3505 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3506 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3507 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3508 rowcount = 0; 3509 nzcount = 0; 3510 for (i=0; i<mbs; i++) { 3511 nmask = 0; 3512 for (j=0; j<bs; j++) { 3513 kmax = rowlengths[rowcount]; 3514 for (k=0; k<kmax; k++) { 3515 tmp = jj[nzcount++]/bs; 3516 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3517 } 3518 rowcount++; 3519 } 3520 browlengths[i] += nmask; 3521 /* zero out the mask elements we set */ 3522 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3523 } 3524 3525 /* Do preallocation */ 3526 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3527 a = (Mat_SeqBAIJ*)newmat->data; 3528 3529 /* set matrix "i" values */ 3530 a->i[0] = 0; 3531 for (i=1; i<= mbs; i++) { 3532 a->i[i] = a->i[i-1] + browlengths[i-1]; 3533 a->ilen[i-1] = browlengths[i-1]; 3534 } 3535 a->nz = 0; 3536 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3537 3538 /* read in nonzero values */ 3539 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3540 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3541 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3542 3543 /* set "a" and "j" values into matrix */ 3544 nzcount = 0; jcount = 0; 3545 for (i=0; i<mbs; i++) { 3546 nzcountb = nzcount; 3547 nmask = 0; 3548 for (j=0; j<bs; j++) { 3549 kmax = rowlengths[i*bs+j]; 3550 for (k=0; k<kmax; k++) { 3551 tmp = jj[nzcount++]/bs; 3552 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3553 } 3554 } 3555 /* sort the masked values */ 3556 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3557 3558 /* set "j" values into matrix */ 3559 maskcount = 1; 3560 for (j=0; j<nmask; j++) { 3561 a->j[jcount++] = masked[j]; 3562 mask[masked[j]] = maskcount++; 3563 } 3564 /* set "a" values into matrix */ 3565 ishift = bs2*a->i[i]; 3566 for (j=0; j<bs; j++) { 3567 kmax = rowlengths[i*bs+j]; 3568 for (k=0; k<kmax; k++) { 3569 tmp = jj[nzcountb]/bs ; 3570 block = mask[tmp] - 1; 3571 point = jj[nzcountb] - bs*tmp; 3572 idx = ishift + bs2*block + j + bs*point; 3573 a->a[idx] = (MatScalar)aa[nzcountb++]; 3574 } 3575 } 3576 /* zero out the mask elements we set */ 3577 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3578 } 3579 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3580 3581 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3582 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3583 ierr = PetscFree(aa);CHKERRQ(ierr); 3584 ierr = PetscFree(jj);CHKERRQ(ierr); 3585 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3586 3587 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3588 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3589 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3590 PetscFunctionReturn(0); 3591 } 3592 3593 #undef __FUNCT__ 3594 #define __FUNCT__ "MatCreateSeqBAIJ" 3595 /*@C 3596 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3597 compressed row) format. For good matrix assembly performance the 3598 user should preallocate the matrix storage by setting the parameter nz 3599 (or the array nnz). By setting these parameters accurately, performance 3600 during matrix assembly can be increased by more than a factor of 50. 3601 3602 Collective on MPI_Comm 3603 3604 Input Parameters: 3605 + comm - MPI communicator, set to PETSC_COMM_SELF 3606 . bs - size of block 3607 . m - number of rows 3608 . n - number of columns 3609 . nz - number of nonzero blocks per block row (same for all rows) 3610 - nnz - array containing the number of nonzero blocks in the various block rows 3611 (possibly different for each block row) or PETSC_NULL 3612 3613 Output Parameter: 3614 . A - the matrix 3615 3616 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3617 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3618 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3619 3620 Options Database Keys: 3621 . -mat_no_unroll - uses code that does not unroll the loops in the 3622 block calculations (much slower) 3623 . -mat_block_size - size of the blocks to use 3624 3625 Level: intermediate 3626 3627 Notes: 3628 The number of rows and columns must be divisible by blocksize. 3629 3630 If the nnz parameter is given then the nz parameter is ignored 3631 3632 A nonzero block is any block that as 1 or more nonzeros in it 3633 3634 The block AIJ format is fully compatible with standard Fortran 77 3635 storage. That is, the stored row and column indices can begin at 3636 either one (as in Fortran) or zero. See the users' manual for details. 3637 3638 Specify the preallocated storage with either nz or nnz (not both). 3639 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3640 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3641 matrices. 3642 3643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3644 @*/ 3645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3646 { 3647 PetscErrorCode ierr; 3648 3649 PetscFunctionBegin; 3650 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3651 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3652 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3653 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3654 PetscFunctionReturn(0); 3655 } 3656 3657 #undef __FUNCT__ 3658 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3659 /*@C 3660 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3661 per row in the matrix. For good matrix assembly performance the 3662 user should preallocate the matrix storage by setting the parameter nz 3663 (or the array nnz). By setting these parameters accurately, performance 3664 during matrix assembly can be increased by more than a factor of 50. 3665 3666 Collective on MPI_Comm 3667 3668 Input Parameters: 3669 + A - the matrix 3670 . bs - size of block 3671 . nz - number of block nonzeros per block row (same for all rows) 3672 - nnz - array containing the number of block nonzeros in the various block rows 3673 (possibly different for each block row) or PETSC_NULL 3674 3675 Options Database Keys: 3676 . -mat_no_unroll - uses code that does not unroll the loops in the 3677 block calculations (much slower) 3678 . -mat_block_size - size of the blocks to use 3679 3680 Level: intermediate 3681 3682 Notes: 3683 If the nnz parameter is given then the nz parameter is ignored 3684 3685 You can call MatGetInfo() to get information on how effective the preallocation was; 3686 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3687 You can also run with the option -info and look for messages with the string 3688 malloc in them to see if additional memory allocation was needed. 3689 3690 The block AIJ format is fully compatible with standard Fortran 77 3691 storage. That is, the stored row and column indices can begin at 3692 either one (as in Fortran) or zero. See the users' manual for details. 3693 3694 Specify the preallocated storage with either nz or nnz (not both). 3695 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3696 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3697 3698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3699 @*/ 3700 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3701 { 3702 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3703 3704 PetscFunctionBegin; 3705 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3706 if (f) { 3707 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3708 } 3709 PetscFunctionReturn(0); 3710 } 3711 3712 #undef __FUNCT__ 3713 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3714 /*@C 3715 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3716 (the default sequential PETSc format). 3717 3718 Collective on MPI_Comm 3719 3720 Input Parameters: 3721 + A - the matrix 3722 . i - the indices into j for the start of each local row (starts with zero) 3723 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3724 - v - optional values in the matrix 3725 3726 Level: developer 3727 3728 .keywords: matrix, aij, compressed row, sparse 3729 3730 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3731 @*/ 3732 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3733 { 3734 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3735 3736 PetscFunctionBegin; 3737 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3738 if (f) { 3739 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3740 } 3741 PetscFunctionReturn(0); 3742 } 3743 3744 3745 #undef __FUNCT__ 3746 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3747 /*@ 3748 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3749 3750 Collective on MPI_Comm 3751 3752 Input Parameters: 3753 + comm - must be an MPI communicator of size 1 3754 . bs - size of block 3755 . m - number of rows 3756 . n - number of columns 3757 . i - row indices 3758 . j - column indices 3759 - a - matrix values 3760 3761 Output Parameter: 3762 . mat - the matrix 3763 3764 Level: advanced 3765 3766 Notes: 3767 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3768 once the matrix is destroyed 3769 3770 You cannot set new nonzero locations into this matrix, that will generate an error. 3771 3772 The i and j indices are 0 based 3773 3774 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3775 3776 3777 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3778 3779 @*/ 3780 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3781 { 3782 PetscErrorCode ierr; 3783 PetscInt ii; 3784 Mat_SeqBAIJ *baij; 3785 3786 PetscFunctionBegin; 3787 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3788 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3789 3790 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3791 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3792 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3793 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3794 baij = (Mat_SeqBAIJ*)(*mat)->data; 3795 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3796 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3797 3798 baij->i = i; 3799 baij->j = j; 3800 baij->a = a; 3801 baij->singlemalloc = PETSC_FALSE; 3802 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3803 baij->free_a = PETSC_FALSE; 3804 baij->free_ij = PETSC_FALSE; 3805 3806 for (ii=0; ii<m; ii++) { 3807 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3808 #if defined(PETSC_USE_DEBUG) 3809 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3810 #endif 3811 } 3812 #if defined(PETSC_USE_DEBUG) 3813 for (ii=0; ii<baij->i[m]; ii++) { 3814 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3815 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3816 } 3817 #endif 3818 3819 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3820 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3821 PetscFunctionReturn(0); 3822 } 3823