1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 28 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 EXTERN_C_BEGIN 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 37 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 40 PetscErrorCode ierr; 41 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 42 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 43 PetscReal shift = 0.0; 44 45 PetscFunctionBegin; 46 if (a->idiagvalid) PetscFunctionReturn(0); 47 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 48 diag_offset = a->diag; 49 if (!a->idiag) { 50 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 51 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 52 } 53 diag = a->idiag; 54 mdiag = a->idiag+bs2*mbs; 55 /* factor and invert each block */ 56 switch (bs){ 57 case 1: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 1*diag_offset[i]; 60 diag[0] = odiag[0]; 61 mdiag[0] = odiag[0]; 62 diag[0] = 1.0 / (diag[0] + shift); 63 diag += 1; 64 mdiag += 1; 65 } 66 break; 67 case 2: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 4*diag_offset[i]; 70 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 71 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 72 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 87 diag += 9; 88 mdiag += 9; 89 } 90 break; 91 case 4: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 16*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 107 diag += 25; 108 mdiag += 25; 109 } 110 break; 111 case 6: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 36*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 127 diag += 49; 128 mdiag += 49; 129 } 130 break; 131 default: 132 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 133 for (i=0; i<mbs; i++) { 134 odiag = v + bs2*diag_offset[i]; 135 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 153 PetscScalar *x,x1,s1; 154 const PetscScalar *b; 155 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,j; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 its = its*lits; 163 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 164 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 165 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 166 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 167 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 168 169 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 170 171 diag = a->diag; 172 idiag = a->idiag; 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 178 x[0] = b[0]*idiag[0]; 179 i2 = 1; 180 idiag += 1; 181 for (i=1; i<m; i++) { 182 v = aa + ai[i]; 183 vi = aj + ai[i]; 184 nz = diag[i] - ai[i]; 185 s1 = b[i2]; 186 for (j=0; j<nz; j++) { 187 s1 -= v[j]*x[vi[j]]; 188 } 189 x[i2] = idiag[0]*s1; 190 idiag += 1; 191 i2 += 1; 192 } 193 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 194 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 195 } 196 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 197 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 198 i2 = 0; 199 mdiag = a->idiag+a->mbs; 200 for (i=0; i<m; i++) { 201 x1 = x[i2]; 202 x[i2] = mdiag[0]*x1; 203 mdiag += 1; 204 i2 += 1; 205 } 206 ierr = PetscLogFlops(m);CHKERRQ(ierr); 207 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 208 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 209 } 210 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 211 idiag = a->idiag+a->mbs - 1; 212 i2 = m - 1; 213 x1 = x[i2]; 214 x[i2] = idiag[0]*x1; 215 idiag -= 1; 216 i2 -= 1; 217 for (i=m-2; i>=0; i--) { 218 v = aa + (diag[i]+1); 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 s1 = x[i2]; 222 for (j=0; j<nz; j++) { 223 s1 -= v[j]*x[vi[j]]; 224 } 225 x[i2] = idiag[0]*s1; 226 idiag -= 1; 227 i2 -= 1; 228 } 229 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 230 } 231 } else { 232 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 233 } 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 PetscScalar *x,x1,x2,s1,s2; 245 const PetscScalar *b; 246 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 247 PetscErrorCode ierr; 248 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 249 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 250 251 PetscFunctionBegin; 252 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 253 its = its*lits; 254 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 255 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 256 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 257 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 258 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 259 260 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 261 262 diag = a->diag; 263 idiag = a->idiag; 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 266 267 if (flag & SOR_ZERO_INITIAL_GUESS) { 268 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 269 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 270 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 271 i2 = 2; 272 idiag += 4; 273 for (i=1; i<m; i++) { 274 v = aa + 4*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s1 = b[i2]; s2 = b[i2+1]; 278 for (j=0; j<nz; j++) { 279 idx = 2*vi[j]; 280 it = 4*j; 281 x1 = x[idx]; x2 = x[1+idx]; 282 s1 -= v[it]*x1 + v[it+2]*x2; 283 s2 -= v[it+1]*x1 + v[it+3]*x2; 284 } 285 x[i2] = idiag[0]*s1 + idiag[2]*s2; 286 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 287 idiag += 4; 288 i2 += 2; 289 } 290 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 291 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 292 } 293 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 294 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 295 i2 = 0; 296 mdiag = a->idiag+4*a->mbs; 297 for (i=0; i<m; i++) { 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 300 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 301 mdiag += 4; 302 i2 += 2; 303 } 304 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 305 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 306 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 307 } 308 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 309 idiag = a->idiag+4*a->mbs - 4; 310 i2 = 2*m - 2; 311 x1 = x[i2]; x2 = x[i2+1]; 312 x[i2] = idiag[0]*x1 + idiag[2]*x2; 313 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 314 idiag -= 4; 315 i2 -= 2; 316 for (i=m-2; i>=0; i--) { 317 v = aa + 4*(diag[i]+1); 318 vi = aj + diag[i] + 1; 319 nz = ai[i+1] - diag[i] - 1; 320 s1 = x[i2]; s2 = x[i2+1]; 321 for (j=0; j<nz; j++) { 322 idx = 2*vi[j]; 323 it = 4*j; 324 x1 = x[idx]; x2 = x[1+idx]; 325 s1 -= v[it]*x1 + v[it+2]*x2; 326 s2 -= v[it+1]*x1 + v[it+3]*x2; 327 } 328 x[i2] = idiag[0]*s1 + idiag[2]*s2; 329 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 330 idiag -= 4; 331 i2 -= 2; 332 } 333 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 334 } 335 } else { 336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 337 } 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 348 PetscScalar *x,x1,x2,x3,s1,s2,s3; 349 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 350 const PetscScalar *b; 351 PetscErrorCode ierr; 352 PetscInt m = a->mbs,i,i2,nz,idx; 353 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 354 355 PetscFunctionBegin; 356 its = its*lits; 357 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 358 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 359 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 360 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 361 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 362 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 363 364 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 365 366 diag = a->diag; 367 idiag = a->idiag; 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 370 371 if (flag & SOR_ZERO_INITIAL_GUESS) { 372 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 373 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 374 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 375 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 376 i2 = 3; 377 idiag += 9; 378 for (i=1; i<m; i++) { 379 v = aa + 9*ai[i]; 380 vi = aj + ai[i]; 381 nz = diag[i] - ai[i]; 382 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 383 while (nz--) { 384 idx = 3*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 386 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 387 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 388 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 392 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 393 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 394 idiag += 9; 395 i2 += 3; 396 } 397 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 398 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 399 } 400 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 401 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 402 i2 = 0; 403 mdiag = a->idiag+9*a->mbs; 404 for (i=0; i<m; i++) { 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 406 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 407 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 408 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 409 mdiag += 9; 410 i2 += 3; 411 } 412 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 413 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 414 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 415 } 416 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 417 idiag = a->idiag+9*a->mbs - 9; 418 i2 = 3*m - 3; 419 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 420 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 421 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 422 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 423 idiag -= 9; 424 i2 -= 3; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 433 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 434 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 435 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 436 v += 9; 437 } 438 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 439 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 440 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 445 } 446 } else { 447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 448 } 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 459 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 460 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 461 const PetscScalar *b; 462 PetscErrorCode ierr; 463 PetscInt m = a->mbs,i,i2,nz,idx; 464 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 465 466 PetscFunctionBegin; 467 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 468 its = its*lits; 469 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 470 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 471 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 472 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 473 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 474 475 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 476 477 diag = a->diag; 478 idiag = a->idiag; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 484 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 485 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 486 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 487 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 488 i2 = 4; 489 idiag += 16; 490 for (i=1; i<m; i++) { 491 v = aa + 16*ai[i]; 492 vi = aj + ai[i]; 493 nz = diag[i] - ai[i]; 494 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 495 while (nz--) { 496 idx = 4*(*vi++); 497 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 498 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 499 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 500 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 501 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 502 v += 16; 503 } 504 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 505 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 506 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 507 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 508 idiag += 16; 509 i2 += 4; 510 } 511 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 512 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 513 } 514 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 515 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 516 i2 = 0; 517 mdiag = a->idiag+16*a->mbs; 518 for (i=0; i<m; i++) { 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 521 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 522 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 523 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 524 mdiag += 16; 525 i2 += 4; 526 } 527 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 528 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 529 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 530 } 531 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 532 idiag = a->idiag+16*a->mbs - 16; 533 i2 = 4*m - 4; 534 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 535 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 536 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 537 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 538 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 539 idiag -= 16; 540 i2 -= 4; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 16*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 546 while (nz--) { 547 idx = 4*(*vi++); 548 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 549 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 556 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 557 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 558 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 559 idiag -= 16; 560 i2 -= 4; 561 } 562 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 563 } 564 } else { 565 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 566 } 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 577 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 578 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 579 const PetscScalar *b; 580 PetscErrorCode ierr; 581 PetscInt m = a->mbs,i,i2,nz,idx; 582 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 583 584 PetscFunctionBegin; 585 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 586 its = its*lits; 587 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 588 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 589 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 590 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 591 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 592 593 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 594 595 diag = a->diag; 596 idiag = a->idiag; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 599 600 if (flag & SOR_ZERO_INITIAL_GUESS) { 601 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 602 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 603 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 604 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 605 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 606 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 607 i2 = 5; 608 idiag += 25; 609 for (i=1; i<m; i++) { 610 v = aa + 25*ai[i]; 611 vi = aj + ai[i]; 612 nz = diag[i] - ai[i]; 613 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 614 while (nz--) { 615 idx = 5*(*vi++); 616 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 617 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 618 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 619 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 620 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 621 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 622 v += 25; 623 } 624 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 625 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 626 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 627 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 628 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 629 idiag += 25; 630 i2 += 5; 631 } 632 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 633 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 634 } 635 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 636 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 637 i2 = 0; 638 mdiag = a->idiag+25*a->mbs; 639 for (i=0; i<m; i++) { 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 642 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 643 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 644 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 645 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 646 mdiag += 25; 647 i2 += 5; 648 } 649 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 650 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 651 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 652 } 653 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 654 idiag = a->idiag+25*a->mbs - 25; 655 i2 = 5*m - 5; 656 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 657 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 658 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 659 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 660 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 661 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 662 idiag -= 25; 663 i2 -= 5; 664 for (i=m-2; i>=0; i--) { 665 v = aa + 25*(diag[i]+1); 666 vi = aj + diag[i] + 1; 667 nz = ai[i+1] - diag[i] - 1; 668 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 672 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 673 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 674 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 675 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 676 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 677 v += 25; 678 } 679 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 680 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 681 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 682 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 683 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 684 idiag -= 25; 685 i2 -= 5; 686 } 687 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 688 } 689 } else { 690 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 702 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 703 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 704 const PetscScalar *b; 705 PetscErrorCode ierr; 706 PetscInt m = a->mbs,i,i2,nz,idx; 707 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 708 709 PetscFunctionBegin; 710 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 711 its = its*lits; 712 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 713 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 714 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 715 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 716 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 717 718 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 719 720 diag = a->diag; 721 idiag = a->idiag; 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 724 725 if (flag & SOR_ZERO_INITIAL_GUESS) { 726 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 727 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 728 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 729 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 730 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 731 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 732 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 733 i2 = 6; 734 idiag += 36; 735 for (i=1; i<m; i++) { 736 v = aa + 36*ai[i]; 737 vi = aj + ai[i]; 738 nz = diag[i] - ai[i]; 739 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 740 while (nz--) { 741 idx = 6*(*vi++); 742 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 743 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 744 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 745 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 746 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 747 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 748 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 749 v += 36; 750 } 751 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 752 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 753 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 754 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 755 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 756 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 757 idiag += 36; 758 i2 += 6; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 762 } 763 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 764 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 765 i2 = 0; 766 mdiag = a->idiag+36*a->mbs; 767 for (i=0; i<m; i++) { 768 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 769 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 770 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 771 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 772 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 773 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 774 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 775 mdiag += 36; 776 i2 += 6; 777 } 778 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 779 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 780 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 781 } 782 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 783 idiag = a->idiag+36*a->mbs - 36; 784 i2 = 6*m - 6; 785 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 786 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 787 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 788 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 789 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 790 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 791 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 792 idiag -= 36; 793 i2 -= 6; 794 for (i=m-2; i>=0; i--) { 795 v = aa + 36*(diag[i]+1); 796 vi = aj + diag[i] + 1; 797 nz = ai[i+1] - diag[i] - 1; 798 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 799 while (nz--) { 800 idx = 6*(*vi++); 801 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 802 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 803 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 804 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 805 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 806 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 807 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 808 v += 36; 809 } 810 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 811 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 812 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 813 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 814 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 815 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 816 idiag -= 36; 817 i2 -= 6; 818 } 819 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 820 } 821 } else { 822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 823 } 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 835 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 836 const PetscScalar *b; 837 PetscErrorCode ierr; 838 PetscInt m = a->mbs,i,i2,nz,idx; 839 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 840 841 PetscFunctionBegin; 842 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 843 its = its*lits; 844 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 845 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 846 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 847 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 848 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 849 850 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 851 852 diag = a->diag; 853 idiag = a->idiag; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 856 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 859 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 860 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 861 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 862 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 863 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 864 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 865 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 866 i2 = 7; 867 idiag += 49; 868 for (i=1; i<m; i++) { 869 v = aa + 49*ai[i]; 870 vi = aj + ai[i]; 871 nz = diag[i] - ai[i]; 872 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 873 while (nz--) { 874 idx = 7*(*vi++); 875 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 876 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 877 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 878 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 879 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 880 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 881 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 882 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 883 v += 49; 884 } 885 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 886 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 887 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 888 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 889 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 890 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 891 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 892 idiag += 49; 893 i2 += 7; 894 } 895 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 896 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 897 } 898 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 899 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 900 i2 = 0; 901 mdiag = a->idiag+49*a->mbs; 902 for (i=0; i<m; i++) { 903 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 904 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 905 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 906 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 907 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 908 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 909 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 910 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 911 mdiag += 49; 912 i2 += 7; 913 } 914 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 915 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 916 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 917 } 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 919 idiag = a->idiag+49*a->mbs - 49; 920 i2 = 7*m - 7; 921 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 922 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 923 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 924 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 925 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 926 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 927 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 928 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 929 idiag -= 49; 930 i2 -= 7; 931 for (i=m-2; i>=0; i--) { 932 v = aa + 49*(diag[i]+1); 933 vi = aj + diag[i] + 1; 934 nz = ai[i+1] - diag[i] - 1; 935 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 936 while (nz--) { 937 idx = 7*(*vi++); 938 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 939 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 940 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 941 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 942 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 943 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 944 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 945 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 946 v += 49; 947 } 948 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 949 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 950 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 951 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 952 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 953 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 954 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 955 idiag -= 49; 956 i2 -= 7; 957 } 958 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 959 } 960 } else { 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*work,*w,*workt; 974 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 975 const PetscScalar *b; 976 PetscErrorCode ierr; 977 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 978 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 979 980 PetscFunctionBegin; 981 its = its*lits; 982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 983 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 984 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 985 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 986 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 987 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 988 989 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 990 991 diag = a->diag; 992 idiag = a->idiag; 993 if (!a->mult_work) { 994 k = PetscMax(A->rmap->n,A->cmap->n); 995 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 996 } 997 work = a->mult_work; 998 if (!a->sor_work) { 999 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1000 } 1001 w = a->sor_work; 1002 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1005 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1008 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1009 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1010 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1011 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1012 i2 = bs; 1013 idiag += bs2; 1014 for (i=1; i<m; i++) { 1015 v = aa + bs2*ai[i]; 1016 vi = aj + ai[i]; 1017 nz = diag[i] - ai[i]; 1018 1019 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1020 /* copy all rows of x that are needed into contiguous space */ 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1027 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1028 while (nz--) { 1029 idx = N*(*vi++); 1030 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1031 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1032 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1033 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1034 v += N2; 1035 } */ 1036 1037 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1038 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1039 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1040 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1041 1042 idiag += bs2; 1043 i2 += bs; 1044 } 1045 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1046 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1047 } 1048 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1049 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1050 i2 = 0; 1051 mdiag = a->idiag+bs2*a->mbs; 1052 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 for (i=0; i<m; i++) { 1054 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1055 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1056 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1057 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1058 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1059 1060 mdiag += bs2; 1061 i2 += bs; 1062 } 1063 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1064 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1065 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 idiag = a->idiag+bs2*a->mbs - bs2; 1069 i2 = bs*m - bs; 1070 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1072 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1073 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1074 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1075 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1076 idiag -= bs2; 1077 i2 -= bs; 1078 for (i=m-2; i>=0; i--) { 1079 v = aa + bs2*(diag[i]+1); 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i+1] - diag[i] - 1; 1082 1083 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1084 /* copy all rows of x that are needed into contiguous space */ 1085 workt = work; 1086 for (j=0; j<nz; j++) { 1087 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1088 workt += bs; 1089 } 1090 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1091 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1092 while (nz--) { 1093 idx = N*(*vi++); 1094 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1095 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1096 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1097 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1098 v += N2; 1099 } */ 1100 1101 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1102 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1103 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1104 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1105 idiag -= bs2; 1106 i2 -= bs; 1107 } 1108 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1109 } 1110 } else { 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1112 } 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1120 */ 1121 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1124 #define matsetvaluesblocked4_ matsetvaluesblocked4 1125 #endif 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "matsetvaluesblocked4_" 1130 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1131 { 1132 Mat A = *AA; 1133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1134 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1135 PetscInt *ai=a->i,*ailen=a->ilen; 1136 PetscInt *aj=a->j,stepval,lastcol = -1; 1137 const PetscScalar *value = v; 1138 MatScalar *ap,*aa = a->a,*bap; 1139 1140 PetscFunctionBegin; 1141 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1142 stepval = (n-1)*4; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; 1145 rp = aj + ai[row]; 1146 ap = aa + 16*ai[row]; 1147 nrow = ailen[row]; 1148 low = 0; 1149 high = nrow; 1150 for (l=0; l<n; l++) { /* loop over added columns */ 1151 col = in[l]; 1152 if (col <= lastcol) low = 0; else high = nrow; 1153 lastcol = col; 1154 value = v + k*(stepval+4 + l)*4; 1155 while (high-low > 7) { 1156 t = (low+high)/2; 1157 if (rp[t] > col) high = t; 1158 else low = t; 1159 } 1160 for (i=low; i<high; i++) { 1161 if (rp[i] > col) break; 1162 if (rp[i] == col) { 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] += *value++; 1167 } 1168 } 1169 goto noinsert2; 1170 } 1171 } 1172 N = nrow++ - 1; 1173 high++; /* added new column index thus must search to one higher than before */ 1174 /* shift up all the later entries in this row */ 1175 for (ii=N; ii>=i; ii--) { 1176 rp[ii+1] = rp[ii]; 1177 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1178 } 1179 if (N >= i) { 1180 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1181 } 1182 rp[i] = col; 1183 bap = ap + 16*i; 1184 for (ii=0; ii<4; ii++,value+=stepval) { 1185 for (jj=ii; jj<16; jj+=4) { 1186 bap[jj] = *value++; 1187 } 1188 } 1189 noinsert2:; 1190 low = i; 1191 } 1192 ailen[row] = nrow; 1193 } 1194 PetscFunctionReturnVoid(); 1195 } 1196 EXTERN_C_END 1197 1198 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1199 #define matsetvalues4_ MATSETVALUES4 1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1201 #define matsetvalues4_ matsetvalues4 1202 #endif 1203 1204 EXTERN_C_BEGIN 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSetValues4_" 1207 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1208 { 1209 Mat A = *AA; 1210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1211 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1212 PetscInt *ai=a->i,*ailen=a->ilen; 1213 PetscInt *aj=a->j,brow,bcol; 1214 PetscInt ridx,cidx,lastcol = -1; 1215 MatScalar *ap,value,*aa=a->a,*bap; 1216 1217 PetscFunctionBegin; 1218 for (k=0; k<m; k++) { /* loop over added rows */ 1219 row = im[k]; brow = row/4; 1220 rp = aj + ai[brow]; 1221 ap = aa + 16*ai[brow]; 1222 nrow = ailen[brow]; 1223 low = 0; 1224 high = nrow; 1225 for (l=0; l<n; l++) { /* loop over added columns */ 1226 col = in[l]; bcol = col/4; 1227 ridx = row % 4; cidx = col % 4; 1228 value = v[l + k*n]; 1229 if (col <= lastcol) low = 0; else high = nrow; 1230 lastcol = col; 1231 while (high-low > 7) { 1232 t = (low+high)/2; 1233 if (rp[t] > bcol) high = t; 1234 else low = t; 1235 } 1236 for (i=low; i<high; i++) { 1237 if (rp[i] > bcol) break; 1238 if (rp[i] == bcol) { 1239 bap = ap + 16*i + 4*cidx + ridx; 1240 *bap += value; 1241 goto noinsert1; 1242 } 1243 } 1244 N = nrow++ - 1; 1245 high++; /* added new column thus must search to one higher than before */ 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1250 } 1251 if (N>=i) { 1252 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1253 } 1254 rp[i] = bcol; 1255 ap[16*i + 4*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturnVoid(); 1262 } 1263 EXTERN_C_END 1264 1265 /* 1266 Checks for missing diagonals 1267 */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 PetscInt *diag,*jj = a->j,i; 1275 1276 PetscFunctionBegin; 1277 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1278 *missing = PETSC_FALSE; 1279 if (A->rmap->n > 0 && !jj) { 1280 *missing = PETSC_TRUE; 1281 if (d) *d = 0; 1282 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1283 } else { 1284 diag = a->diag; 1285 for (i=0; i<a->mbs; i++) { 1286 if (jj[diag[i]] != i) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = i; 1289 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1290 } 1291 } 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 PetscInt i,j,m = a->mbs; 1303 1304 PetscFunctionBegin; 1305 if (!a->diag) { 1306 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1307 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1308 a->free_diag = PETSC_TRUE; 1309 } 1310 for (i=0; i<m; i++) { 1311 a->diag[i] = a->i[i+1]; 1312 for (j=a->i[i]; j<a->i[i+1]; j++) { 1313 if (a->j[j] == i) { 1314 a->diag[i] = j; 1315 break; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1332 PetscInt *tia, *tja; 1333 1334 PetscFunctionBegin; 1335 *nn = n; 1336 if (!ia) PetscFunctionReturn(0); 1337 if (symmetric) { 1338 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1339 nz = tia[n]; 1340 } else { 1341 tia = a->i; tja = a->j; 1342 } 1343 1344 if (!blockcompressed && bs > 1) { 1345 (*nn) *= bs; 1346 /* malloc & create the natural set of indices */ 1347 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1348 if (n) { 1349 (*ia)[0] = 0; 1350 for (j=1; j<bs; j++) { 1351 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1352 } 1353 } 1354 1355 for (i=1; i<n; i++) { 1356 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1359 } 1360 } 1361 if (n) { 1362 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1363 } 1364 1365 if (ja) { 1366 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1367 cnt = 0; 1368 for (i=0; i<n; i++) { 1369 for (j=0; j<bs; j++) { 1370 for (k=tia[i]; k<tia[i+1]; k++) { 1371 for (l=0; l<bs; l++) { 1372 (*ja)[cnt++] = bs*tja[k] + l; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 1379 n *= bs; 1380 nz *= bs*bs; 1381 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1382 ierr = PetscFree(tia);CHKERRQ(ierr); 1383 ierr = PetscFree(tja);CHKERRQ(ierr); 1384 } 1385 } else if (oshift == 1) { 1386 if (symmetric) { 1387 PetscInt nz = tia[A->rmap->n/bs]; 1388 /* add 1 to i and j indices */ 1389 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1390 *ia = tia; 1391 if (ja) { 1392 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1393 *ja = tja; 1394 } 1395 } else { 1396 PetscInt nz = a->i[A->rmap->n/bs]; 1397 /* malloc space and add 1 to i and j indices */ 1398 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1399 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1400 if (ja) { 1401 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1402 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1403 } 1404 } 1405 } else { 1406 *ia = tia; 1407 if (ja) *ja = tja; 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (!ia) PetscFunctionReturn(0); 1421 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1422 ierr = PetscFree(*ia);CHKERRQ(ierr); 1423 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 #if defined(PETSC_USE_LOG) 1437 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1438 #endif 1439 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1441 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1442 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1443 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1444 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1445 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1446 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1447 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1448 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1449 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1450 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1451 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1452 1453 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1454 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1455 ierr = PetscFree(a);CHKERRQ(ierr); 1456 1457 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1458 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1459 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1461 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1471 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1472 { 1473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1474 PetscErrorCode ierr; 1475 1476 PetscFunctionBegin; 1477 switch (op) { 1478 case MAT_ROW_ORIENTED: 1479 a->roworiented = flg; 1480 break; 1481 case MAT_KEEP_NONZERO_PATTERN: 1482 a->keepnonzeropattern = flg; 1483 break; 1484 case MAT_NEW_NONZERO_LOCATIONS: 1485 a->nonew = (flg ? 0 : 1); 1486 break; 1487 case MAT_NEW_NONZERO_LOCATION_ERR: 1488 a->nonew = (flg ? -1 : 0); 1489 break; 1490 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1491 a->nonew = (flg ? -2 : 0); 1492 break; 1493 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1494 a->nounused = (flg ? -1 : 0); 1495 break; 1496 case MAT_CHECK_COMPRESSED_ROW: 1497 a->compressedrow.check = flg; 1498 break; 1499 case MAT_NEW_DIAGONALS: 1500 case MAT_IGNORE_OFF_PROC_ENTRIES: 1501 case MAT_USE_HASH_TABLE: 1502 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1503 break; 1504 case MAT_SYMMETRIC: 1505 case MAT_STRUCTURALLY_SYMMETRIC: 1506 case MAT_HERMITIAN: 1507 case MAT_SYMMETRY_ETERNAL: 1508 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1509 break; 1510 default: 1511 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1518 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1519 { 1520 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1523 MatScalar *aa,*aa_i; 1524 PetscScalar *v_i; 1525 1526 PetscFunctionBegin; 1527 bs = A->rmap->bs; 1528 ai = a->i; 1529 aj = a->j; 1530 aa = a->a; 1531 bs2 = a->bs2; 1532 1533 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1534 1535 bn = row/bs; /* Block number */ 1536 bp = row % bs; /* Block Position */ 1537 M = ai[bn+1] - ai[bn]; 1538 *nz = bs*M; 1539 1540 if (v) { 1541 *v = 0; 1542 if (*nz) { 1543 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1544 for (i=0; i<M; i++) { /* for each block in the block row */ 1545 v_i = *v + i*bs; 1546 aa_i = aa + bs2*(ai[bn] + i); 1547 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1548 } 1549 } 1550 } 1551 1552 if (idx) { 1553 *idx = 0; 1554 if (*nz) { 1555 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1556 for (i=0; i<M; i++) { /* for each block in the block row */ 1557 idx_i = *idx + i*bs; 1558 itmp = bs*aj[ai[bn] + i]; 1559 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1560 } 1561 } 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1568 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1569 { 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1574 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1575 PetscFunctionReturn(0); 1576 } 1577 1578 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1582 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1583 { 1584 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1585 Mat C; 1586 PetscErrorCode ierr; 1587 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1588 PetscInt *rows,*cols,bs2=a->bs2; 1589 MatScalar *array; 1590 1591 PetscFunctionBegin; 1592 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1593 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1594 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1595 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1596 1597 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1598 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1599 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1600 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1601 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1602 ierr = PetscFree(col);CHKERRQ(ierr); 1603 } else { 1604 C = *B; 1605 } 1606 1607 array = a->a; 1608 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1609 for (i=0; i<mbs; i++) { 1610 cols[0] = i*bs; 1611 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1612 len = ai[i+1] - ai[i]; 1613 for (j=0; j<len; j++) { 1614 rows[0] = (*aj++)*bs; 1615 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1616 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1617 array += bs2; 1618 } 1619 } 1620 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1621 1622 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1624 1625 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1626 *B = C; 1627 } else { 1628 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1635 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1636 { 1637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1638 PetscErrorCode ierr; 1639 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1640 int fd; 1641 PetscScalar *aa; 1642 FILE *file; 1643 1644 PetscFunctionBegin; 1645 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1646 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1647 col_lens[0] = MAT_FILE_CLASSID; 1648 1649 col_lens[1] = A->rmap->N; 1650 col_lens[2] = A->cmap->n; 1651 col_lens[3] = a->nz*bs2; 1652 1653 /* store lengths of each row and write (including header) to file */ 1654 count = 0; 1655 for (i=0; i<a->mbs; i++) { 1656 for (j=0; j<bs; j++) { 1657 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1658 } 1659 } 1660 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1661 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1662 1663 /* store column indices (zero start index) */ 1664 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1665 count = 0; 1666 for (i=0; i<a->mbs; i++) { 1667 for (j=0; j<bs; j++) { 1668 for (k=a->i[i]; k<a->i[i+1]; k++) { 1669 for (l=0; l<bs; l++) { 1670 jj[count++] = bs*a->j[k] + l; 1671 } 1672 } 1673 } 1674 } 1675 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1676 ierr = PetscFree(jj);CHKERRQ(ierr); 1677 1678 /* store nonzero values */ 1679 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1680 count = 0; 1681 for (i=0; i<a->mbs; i++) { 1682 for (j=0; j<bs; j++) { 1683 for (k=a->i[i]; k<a->i[i+1]; k++) { 1684 for (l=0; l<bs; l++) { 1685 aa[count++] = a->a[bs2*k + l*bs + j]; 1686 } 1687 } 1688 } 1689 } 1690 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1691 ierr = PetscFree(aa);CHKERRQ(ierr); 1692 1693 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1694 if (file) { 1695 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1702 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1705 PetscErrorCode ierr; 1706 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1707 PetscViewerFormat format; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1711 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1712 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1713 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1714 Mat aij; 1715 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1716 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1717 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1718 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1719 PetscFunctionReturn(0); 1720 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1721 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1722 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1723 for (i=0; i<a->mbs; i++) { 1724 for (j=0; j<bs; j++) { 1725 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1726 for (k=a->i[i]; k<a->i[i+1]; k++) { 1727 for (l=0; l<bs; l++) { 1728 #if defined(PETSC_USE_COMPLEX) 1729 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1730 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1731 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1733 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1734 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1735 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1736 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1737 } 1738 #else 1739 if (a->a[bs2*k + l*bs + j] != 0.0) { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1741 } 1742 #endif 1743 } 1744 } 1745 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1746 } 1747 } 1748 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1749 } else { 1750 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1751 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1752 for (i=0; i<a->mbs; i++) { 1753 for (j=0; j<bs; j++) { 1754 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1755 for (k=a->i[i]; k<a->i[i+1]; k++) { 1756 for (l=0; l<bs; l++) { 1757 #if defined(PETSC_USE_COMPLEX) 1758 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1760 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1762 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1763 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1764 } else { 1765 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1766 } 1767 #else 1768 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1769 #endif 1770 } 1771 } 1772 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1773 } 1774 } 1775 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1776 } 1777 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1783 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1784 { 1785 Mat A = (Mat) Aa; 1786 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1787 PetscErrorCode ierr; 1788 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1789 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1790 MatScalar *aa; 1791 PetscViewer viewer; 1792 1793 PetscFunctionBegin; 1794 1795 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1796 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1797 1798 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1799 1800 /* loop over matrix elements drawing boxes */ 1801 color = PETSC_DRAW_BLUE; 1802 for (i=0,row=0; i<mbs; i++,row+=bs) { 1803 for (j=a->i[i]; j<a->i[i+1]; j++) { 1804 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1805 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1806 aa = a->a + j*bs2; 1807 for (k=0; k<bs; k++) { 1808 for (l=0; l<bs; l++) { 1809 if (PetscRealPart(*aa++) >= 0.) continue; 1810 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 color = PETSC_DRAW_CYAN; 1816 for (i=0,row=0; i<mbs; i++,row+=bs) { 1817 for (j=a->i[i]; j<a->i[i+1]; j++) { 1818 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1819 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1820 aa = a->a + j*bs2; 1821 for (k=0; k<bs; k++) { 1822 for (l=0; l<bs; l++) { 1823 if (PetscRealPart(*aa++) != 0.) continue; 1824 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1825 } 1826 } 1827 } 1828 } 1829 1830 color = PETSC_DRAW_RED; 1831 for (i=0,row=0; i<mbs; i++,row+=bs) { 1832 for (j=a->i[i]; j<a->i[i+1]; j++) { 1833 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1834 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1835 aa = a->a + j*bs2; 1836 for (k=0; k<bs; k++) { 1837 for (l=0; l<bs; l++) { 1838 if (PetscRealPart(*aa++) <= 0.) continue; 1839 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1840 } 1841 } 1842 } 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1849 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1850 { 1851 PetscErrorCode ierr; 1852 PetscReal xl,yl,xr,yr,w,h; 1853 PetscDraw draw; 1854 PetscBool isnull; 1855 1856 PetscFunctionBegin; 1857 1858 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1859 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1860 1861 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1862 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1863 xr += w; yr += h; xl = -w; yl = -h; 1864 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1865 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1866 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatView_SeqBAIJ" 1872 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1873 { 1874 PetscErrorCode ierr; 1875 PetscBool iascii,isbinary,isdraw; 1876 1877 PetscFunctionBegin; 1878 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1879 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1880 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1881 if (iascii){ 1882 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1883 } else if (isbinary) { 1884 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1885 } else if (isdraw) { 1886 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1887 } else { 1888 Mat B; 1889 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1890 ierr = MatView(B,viewer);CHKERRQ(ierr); 1891 ierr = MatDestroy(&B);CHKERRQ(ierr); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1899 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1900 { 1901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1902 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1903 PetscInt *ai = a->i,*ailen = a->ilen; 1904 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1905 MatScalar *ap,*aa = a->a; 1906 1907 PetscFunctionBegin; 1908 for (k=0; k<m; k++) { /* loop over rows */ 1909 row = im[k]; brow = row/bs; 1910 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1911 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1912 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1913 nrow = ailen[brow]; 1914 for (l=0; l<n; l++) { /* loop over columns */ 1915 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1916 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1917 col = in[l] ; 1918 bcol = col/bs; 1919 cidx = col%bs; 1920 ridx = row%bs; 1921 high = nrow; 1922 low = 0; /* assume unsorted */ 1923 while (high-low > 5) { 1924 t = (low+high)/2; 1925 if (rp[t] > bcol) high = t; 1926 else low = t; 1927 } 1928 for (i=low; i<high; i++) { 1929 if (rp[i] > bcol) break; 1930 if (rp[i] == bcol) { 1931 *v++ = ap[bs2*i+bs*cidx+ridx]; 1932 goto finished; 1933 } 1934 } 1935 *v++ = 0.0; 1936 finished:; 1937 } 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1944 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1945 { 1946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1947 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1948 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1949 PetscErrorCode ierr; 1950 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1951 PetscBool roworiented=a->roworiented; 1952 const PetscScalar *value = v; 1953 MatScalar *ap,*aa = a->a,*bap; 1954 1955 PetscFunctionBegin; 1956 if (roworiented) { 1957 stepval = (n-1)*bs; 1958 } else { 1959 stepval = (m-1)*bs; 1960 } 1961 for (k=0; k<m; k++) { /* loop over added rows */ 1962 row = im[k]; 1963 if (row < 0) continue; 1964 #if defined(PETSC_USE_DEBUG) 1965 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1966 #endif 1967 rp = aj + ai[row]; 1968 ap = aa + bs2*ai[row]; 1969 rmax = imax[row]; 1970 nrow = ailen[row]; 1971 low = 0; 1972 high = nrow; 1973 for (l=0; l<n; l++) { /* loop over added columns */ 1974 if (in[l] < 0) continue; 1975 #if defined(PETSC_USE_DEBUG) 1976 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1977 #endif 1978 col = in[l]; 1979 if (roworiented) { 1980 value = v + (k*(stepval+bs) + l)*bs; 1981 } else { 1982 value = v + (l*(stepval+bs) + k)*bs; 1983 } 1984 if (col <= lastcol) low = 0; else high = nrow; 1985 lastcol = col; 1986 while (high-low > 7) { 1987 t = (low+high)/2; 1988 if (rp[t] > col) high = t; 1989 else low = t; 1990 } 1991 for (i=low; i<high; i++) { 1992 if (rp[i] > col) break; 1993 if (rp[i] == col) { 1994 bap = ap + bs2*i; 1995 if (roworiented) { 1996 if (is == ADD_VALUES) { 1997 for (ii=0; ii<bs; ii++,value+=stepval) { 1998 for (jj=ii; jj<bs2; jj+=bs) { 1999 bap[jj] += *value++; 2000 } 2001 } 2002 } else { 2003 for (ii=0; ii<bs; ii++,value+=stepval) { 2004 for (jj=ii; jj<bs2; jj+=bs) { 2005 bap[jj] = *value++; 2006 } 2007 } 2008 } 2009 } else { 2010 if (is == ADD_VALUES) { 2011 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2012 for (jj=0; jj<bs; jj++) { 2013 bap[jj] += value[jj]; 2014 } 2015 bap += bs; 2016 } 2017 } else { 2018 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2019 for (jj=0; jj<bs; jj++) { 2020 bap[jj] = value[jj]; 2021 } 2022 bap += bs; 2023 } 2024 } 2025 } 2026 goto noinsert2; 2027 } 2028 } 2029 if (nonew == 1) goto noinsert2; 2030 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2031 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2032 N = nrow++ - 1; high++; 2033 /* shift up all the later entries in this row */ 2034 for (ii=N; ii>=i; ii--) { 2035 rp[ii+1] = rp[ii]; 2036 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2037 } 2038 if (N >= i) { 2039 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2040 } 2041 rp[i] = col; 2042 bap = ap + bs2*i; 2043 if (roworiented) { 2044 for (ii=0; ii<bs; ii++,value+=stepval) { 2045 for (jj=ii; jj<bs2; jj+=bs) { 2046 bap[jj] = *value++; 2047 } 2048 } 2049 } else { 2050 for (ii=0; ii<bs; ii++,value+=stepval) { 2051 for (jj=0; jj<bs; jj++) { 2052 *bap++ = *value++; 2053 } 2054 } 2055 } 2056 noinsert2:; 2057 low = i; 2058 } 2059 ailen[row] = nrow; 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2066 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2067 { 2068 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2069 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2070 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2071 PetscErrorCode ierr; 2072 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2073 MatScalar *aa = a->a,*ap; 2074 PetscReal ratio=0.6; 2075 2076 PetscFunctionBegin; 2077 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2078 2079 if (m) rmax = ailen[0]; 2080 for (i=1; i<mbs; i++) { 2081 /* move each row back by the amount of empty slots (fshift) before it*/ 2082 fshift += imax[i-1] - ailen[i-1]; 2083 rmax = PetscMax(rmax,ailen[i]); 2084 if (fshift) { 2085 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2086 N = ailen[i]; 2087 for (j=0; j<N; j++) { 2088 ip[j-fshift] = ip[j]; 2089 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2090 } 2091 } 2092 ai[i] = ai[i-1] + ailen[i-1]; 2093 } 2094 if (mbs) { 2095 fshift += imax[mbs-1] - ailen[mbs-1]; 2096 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2097 } 2098 /* reset ilen and imax for each row */ 2099 for (i=0; i<mbs; i++) { 2100 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2101 } 2102 a->nz = ai[mbs]; 2103 2104 /* diagonals may have moved, so kill the diagonal pointers */ 2105 a->idiagvalid = PETSC_FALSE; 2106 if (fshift && a->diag) { 2107 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2108 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2109 a->diag = 0; 2110 } 2111 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2112 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2113 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2114 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2115 A->info.mallocs += a->reallocs; 2116 a->reallocs = 0; 2117 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2118 2119 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2120 A->same_nonzero = PETSC_TRUE; 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /* 2125 This function returns an array of flags which indicate the locations of contiguous 2126 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2127 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2128 Assume: sizes should be long enough to hold all the values. 2129 */ 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2132 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2133 { 2134 PetscInt i,j,k,row; 2135 PetscBool flg; 2136 2137 PetscFunctionBegin; 2138 for (i=0,j=0; i<n; j++) { 2139 row = idx[i]; 2140 if (row%bs!=0) { /* Not the begining of a block */ 2141 sizes[j] = 1; 2142 i++; 2143 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2144 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2145 i++; 2146 } else { /* Begining of the block, so check if the complete block exists */ 2147 flg = PETSC_TRUE; 2148 for (k=1; k<bs; k++) { 2149 if (row+k != idx[i+k]) { /* break in the block */ 2150 flg = PETSC_FALSE; 2151 break; 2152 } 2153 } 2154 if (flg) { /* No break in the bs */ 2155 sizes[j] = bs; 2156 i+= bs; 2157 } else { 2158 sizes[j] = 1; 2159 i++; 2160 } 2161 } 2162 } 2163 *bs_max = j; 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2169 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2170 { 2171 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2172 PetscErrorCode ierr; 2173 PetscInt i,j,k,count,*rows; 2174 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2175 PetscScalar zero = 0.0; 2176 MatScalar *aa; 2177 const PetscScalar *xx; 2178 PetscScalar *bb; 2179 2180 PetscFunctionBegin; 2181 /* fix right hand side if needed */ 2182 if (x && b) { 2183 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2184 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2185 for (i=0; i<is_n; i++) { 2186 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2187 } 2188 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2189 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2190 } 2191 2192 /* Make a copy of the IS and sort it */ 2193 /* allocate memory for rows,sizes */ 2194 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2195 2196 /* copy IS values to rows, and sort them */ 2197 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2198 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2199 2200 if (baij->keepnonzeropattern) { 2201 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2202 bs_max = is_n; 2203 A->same_nonzero = PETSC_TRUE; 2204 } else { 2205 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2206 A->same_nonzero = PETSC_FALSE; 2207 } 2208 2209 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2210 row = rows[j]; 2211 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2212 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2213 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2214 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2215 if (diag != 0.0) { 2216 if (baij->ilen[row/bs] > 0) { 2217 baij->ilen[row/bs] = 1; 2218 baij->j[baij->i[row/bs]] = row/bs; 2219 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2220 } 2221 /* Now insert all the diagonal values for this bs */ 2222 for (k=0; k<bs; k++) { 2223 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2224 } 2225 } else { /* (diag == 0.0) */ 2226 baij->ilen[row/bs] = 0; 2227 } /* end (diag == 0.0) */ 2228 } else { /* (sizes[i] != bs) */ 2229 #if defined (PETSC_USE_DEBUG) 2230 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2231 #endif 2232 for (k=0; k<count; k++) { 2233 aa[0] = zero; 2234 aa += bs; 2235 } 2236 if (diag != 0.0) { 2237 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2238 } 2239 } 2240 } 2241 2242 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2243 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2249 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2250 { 2251 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2252 PetscErrorCode ierr; 2253 PetscInt i,j,k,count; 2254 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2255 PetscScalar zero = 0.0; 2256 MatScalar *aa; 2257 const PetscScalar *xx; 2258 PetscScalar *bb; 2259 PetscBool *zeroed,vecs = PETSC_FALSE; 2260 2261 PetscFunctionBegin; 2262 /* fix right hand side if needed */ 2263 if (x && b) { 2264 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2265 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2266 vecs = PETSC_TRUE; 2267 } 2268 A->same_nonzero = PETSC_TRUE; 2269 2270 /* zero the columns */ 2271 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2272 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2273 for (i=0; i<is_n; i++) { 2274 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2275 zeroed[is_idx[i]] = PETSC_TRUE; 2276 } 2277 for (i=0; i<A->rmap->N; i++) { 2278 if (!zeroed[i]) { 2279 row = i/bs; 2280 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2281 for (k=0; k<bs; k++) { 2282 col = bs*baij->j[j] + k; 2283 if (zeroed[col]) { 2284 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2285 if (vecs) bb[i] -= aa[0]*xx[col]; 2286 aa[0] = 0.0; 2287 } 2288 } 2289 } 2290 } else if (vecs) bb[i] = diag*xx[i]; 2291 } 2292 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2293 if (vecs) { 2294 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2295 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2296 } 2297 2298 /* zero the rows */ 2299 for (i=0; i<is_n; i++) { 2300 row = is_idx[i]; 2301 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2302 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2303 for (k=0; k<count; k++) { 2304 aa[0] = zero; 2305 aa += bs; 2306 } 2307 if (diag != 0.0) { 2308 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2309 } 2310 } 2311 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2317 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2318 { 2319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2320 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2321 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2322 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2323 PetscErrorCode ierr; 2324 PetscInt ridx,cidx,bs2=a->bs2; 2325 PetscBool roworiented=a->roworiented; 2326 MatScalar *ap,value,*aa=a->a,*bap; 2327 2328 PetscFunctionBegin; 2329 if (v) PetscValidScalarPointer(v,6); 2330 for (k=0; k<m; k++) { /* loop over added rows */ 2331 row = im[k]; 2332 brow = row/bs; 2333 if (row < 0) continue; 2334 #if defined(PETSC_USE_DEBUG) 2335 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2336 #endif 2337 rp = aj + ai[brow]; 2338 ap = aa + bs2*ai[brow]; 2339 rmax = imax[brow]; 2340 nrow = ailen[brow]; 2341 low = 0; 2342 high = nrow; 2343 for (l=0; l<n; l++) { /* loop over added columns */ 2344 if (in[l] < 0) continue; 2345 #if defined(PETSC_USE_DEBUG) 2346 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2347 #endif 2348 col = in[l]; bcol = col/bs; 2349 ridx = row % bs; cidx = col % bs; 2350 if (roworiented) { 2351 value = v[l + k*n]; 2352 } else { 2353 value = v[k + l*m]; 2354 } 2355 if (col <= lastcol) low = 0; else high = nrow; 2356 lastcol = col; 2357 while (high-low > 7) { 2358 t = (low+high)/2; 2359 if (rp[t] > bcol) high = t; 2360 else low = t; 2361 } 2362 for (i=low; i<high; i++) { 2363 if (rp[i] > bcol) break; 2364 if (rp[i] == bcol) { 2365 bap = ap + bs2*i + bs*cidx + ridx; 2366 if (is == ADD_VALUES) *bap += value; 2367 else *bap = value; 2368 goto noinsert1; 2369 } 2370 } 2371 if (nonew == 1) goto noinsert1; 2372 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2373 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2374 N = nrow++ - 1; high++; 2375 /* shift up all the later entries in this row */ 2376 for (ii=N; ii>=i; ii--) { 2377 rp[ii+1] = rp[ii]; 2378 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2379 } 2380 if (N>=i) { 2381 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2382 } 2383 rp[i] = bcol; 2384 ap[bs2*i + bs*cidx + ridx] = value; 2385 a->nz++; 2386 noinsert1:; 2387 low = i; 2388 } 2389 ailen[brow] = nrow; 2390 } 2391 A->same_nonzero = PETSC_FALSE; 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2397 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2398 { 2399 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2400 Mat outA; 2401 PetscErrorCode ierr; 2402 PetscBool row_identity,col_identity; 2403 2404 PetscFunctionBegin; 2405 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2406 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2407 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2408 if (!row_identity || !col_identity) { 2409 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2410 } 2411 2412 outA = inA; 2413 inA->factortype = MAT_FACTOR_LU; 2414 2415 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2416 2417 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2418 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2419 a->row = row; 2420 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2421 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2422 a->col = col; 2423 2424 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2425 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2426 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2427 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2428 2429 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2430 if (!a->solve_work) { 2431 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2432 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2433 } 2434 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2435 2436 PetscFunctionReturn(0); 2437 } 2438 2439 EXTERN_C_BEGIN 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2442 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2443 { 2444 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2445 PetscInt i,nz,mbs; 2446 2447 PetscFunctionBegin; 2448 nz = baij->maxnz; 2449 mbs = baij->mbs; 2450 for (i=0; i<nz; i++) { 2451 baij->j[i] = indices[i]; 2452 } 2453 baij->nz = nz; 2454 for (i=0; i<mbs; i++) { 2455 baij->ilen[i] = baij->imax[i]; 2456 } 2457 PetscFunctionReturn(0); 2458 } 2459 EXTERN_C_END 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2463 /*@ 2464 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2465 in the matrix. 2466 2467 Input Parameters: 2468 + mat - the SeqBAIJ matrix 2469 - indices - the column indices 2470 2471 Level: advanced 2472 2473 Notes: 2474 This can be called if you have precomputed the nonzero structure of the 2475 matrix and want to provide it to the matrix object to improve the performance 2476 of the MatSetValues() operation. 2477 2478 You MUST have set the correct numbers of nonzeros per row in the call to 2479 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2480 2481 MUST be called before any calls to MatSetValues(); 2482 2483 @*/ 2484 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2485 { 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2490 PetscValidPointer(indices,2); 2491 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2497 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2498 { 2499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2500 PetscErrorCode ierr; 2501 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2502 PetscReal atmp; 2503 PetscScalar *x,zero = 0.0; 2504 MatScalar *aa; 2505 PetscInt ncols,brow,krow,kcol; 2506 2507 PetscFunctionBegin; 2508 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2509 bs = A->rmap->bs; 2510 aa = a->a; 2511 ai = a->i; 2512 aj = a->j; 2513 mbs = a->mbs; 2514 2515 ierr = VecSet(v,zero);CHKERRQ(ierr); 2516 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2517 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2518 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2519 for (i=0; i<mbs; i++) { 2520 ncols = ai[1] - ai[0]; ai++; 2521 brow = bs*i; 2522 for (j=0; j<ncols; j++){ 2523 for (kcol=0; kcol<bs; kcol++){ 2524 for (krow=0; krow<bs; krow++){ 2525 atmp = PetscAbsScalar(*aa);aa++; 2526 row = brow + krow; /* row index */ 2527 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2528 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2529 } 2530 } 2531 aj++; 2532 } 2533 } 2534 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatCopy_SeqBAIJ" 2540 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2541 { 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 /* If the two matrices have the same copy implementation, use fast copy. */ 2546 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2548 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2549 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2550 2551 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2552 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2553 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2554 } else { 2555 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2562 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2563 { 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2573 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2574 { 2575 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2576 PetscFunctionBegin; 2577 *array = a->a; 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2583 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2584 { 2585 PetscFunctionBegin; 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNCT__ 2590 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2591 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2592 { 2593 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2594 PetscErrorCode ierr; 2595 PetscInt i,bs=Y->rmap->bs,j,bs2; 2596 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2597 2598 PetscFunctionBegin; 2599 if (str == SAME_NONZERO_PATTERN) { 2600 PetscScalar alpha = a; 2601 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2602 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2603 if (y->xtoy && y->XtoY != X) { 2604 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2605 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2606 } 2607 if (!y->xtoy) { /* get xtoy */ 2608 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2609 y->XtoY = X; 2610 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2611 } 2612 bs2 = bs*bs; 2613 for (i=0; i<x->nz; i++) { 2614 j = 0; 2615 while (j < bs2){ 2616 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2617 j++; 2618 } 2619 } 2620 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2621 } else { 2622 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2623 } 2624 PetscFunctionReturn(0); 2625 } 2626 2627 #undef __FUNCT__ 2628 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2629 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2630 { 2631 PetscInt rbs,cbs; 2632 PetscErrorCode ierr; 2633 2634 PetscFunctionBegin; 2635 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2636 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2637 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2638 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 #undef __FUNCT__ 2643 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2644 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2645 { 2646 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2647 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2648 MatScalar *aa = a->a; 2649 2650 PetscFunctionBegin; 2651 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2652 PetscFunctionReturn(0); 2653 } 2654 2655 #undef __FUNCT__ 2656 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2657 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2658 { 2659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2660 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2661 MatScalar *aa = a->a; 2662 2663 PetscFunctionBegin; 2664 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2672 /* 2673 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2674 */ 2675 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2676 { 2677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2678 PetscErrorCode ierr; 2679 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2680 PetscInt nz = a->i[m],row,*jj,mr,col; 2681 2682 PetscFunctionBegin; 2683 *nn = n; 2684 if (!ia) PetscFunctionReturn(0); 2685 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2686 else { 2687 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2688 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2689 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2690 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2691 jj = a->j; 2692 for (i=0; i<nz; i++) { 2693 collengths[jj[i]]++; 2694 } 2695 cia[0] = oshift; 2696 for (i=0; i<n; i++) { 2697 cia[i+1] = cia[i] + collengths[i]; 2698 } 2699 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2700 jj = a->j; 2701 for (row=0; row<m; row++) { 2702 mr = a->i[row+1] - a->i[row]; 2703 for (i=0; i<mr; i++) { 2704 col = *jj++; 2705 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2706 } 2707 } 2708 ierr = PetscFree(collengths);CHKERRQ(ierr); 2709 *ia = cia; *ja = cja; 2710 } 2711 PetscFunctionReturn(0); 2712 } 2713 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2716 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2717 { 2718 PetscErrorCode ierr; 2719 2720 PetscFunctionBegin; 2721 if (!ia) PetscFunctionReturn(0); 2722 ierr = PetscFree(*ia);CHKERRQ(ierr); 2723 ierr = PetscFree(*ja);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2729 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2730 { 2731 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2732 PetscErrorCode ierr; 2733 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2734 PetscScalar dx,*y,*xx,*w3_array; 2735 PetscScalar *vscale_array; 2736 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2737 Vec w1=coloring->w1,w2=coloring->w2,w3; 2738 void *fctx = coloring->fctx; 2739 PetscBool flg = PETSC_FALSE; 2740 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2741 Vec x1_tmp; 2742 2743 PetscFunctionBegin; 2744 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2745 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2746 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2747 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2748 2749 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2750 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2751 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2752 if (flg) { 2753 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2754 } else { 2755 PetscBool assembled; 2756 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2757 if (assembled) { 2758 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2759 } 2760 } 2761 2762 x1_tmp = x1; 2763 if (!coloring->vscale){ 2764 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2765 } 2766 2767 /* 2768 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2769 coloring->F for the coarser grids from the finest 2770 */ 2771 if (coloring->F) { 2772 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2773 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2774 if (m1 != m2) { 2775 coloring->F = 0; 2776 } 2777 } 2778 2779 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2780 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2781 } 2782 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2783 2784 /* Set w1 = F(x1) */ 2785 if (coloring->F) { 2786 w1 = coloring->F; /* use already computed value of function */ 2787 coloring->F = 0; 2788 } else { 2789 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2790 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2791 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2792 } 2793 2794 if (!coloring->w3) { 2795 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2797 } 2798 w3 = coloring->w3; 2799 2800 CHKMEMQ; 2801 /* Compute all the local scale factors, including ghost points */ 2802 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2803 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2804 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2805 if (ctype == IS_COLORING_GHOSTED){ 2806 col_start = 0; col_end = N; 2807 } else if (ctype == IS_COLORING_GLOBAL){ 2808 xx = xx - start; 2809 vscale_array = vscale_array - start; 2810 col_start = start; col_end = N + start; 2811 } CHKMEMQ; 2812 for (col=col_start; col<col_end; col++){ 2813 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2814 if (coloring->htype[0] == 'w') { 2815 dx = 1.0 + unorm; 2816 } else { 2817 dx = xx[col]; 2818 } 2819 if (dx == 0.0) dx = 1.0; 2820 #if !defined(PETSC_USE_COMPLEX) 2821 if (dx < umin && dx >= 0.0) dx = umin; 2822 else if (dx < 0.0 && dx > -umin) dx = -umin; 2823 #else 2824 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2825 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2826 #endif 2827 dx *= epsilon; 2828 vscale_array[col] = 1.0/dx; 2829 } CHKMEMQ; 2830 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2831 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2832 if (ctype == IS_COLORING_GLOBAL){ 2833 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2834 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2835 } 2836 CHKMEMQ; 2837 if (coloring->vscaleforrow) { 2838 vscaleforrow = coloring->vscaleforrow; 2839 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2840 2841 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2842 /* 2843 Loop over each color 2844 */ 2845 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2846 for (k=0; k<coloring->ncolors; k++) { 2847 coloring->currentcolor = k; 2848 for (i=0; i<bs; i++) { 2849 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2850 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2851 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2852 /* 2853 Loop over each column associated with color 2854 adding the perturbation to the vector w3. 2855 */ 2856 for (l=0; l<coloring->ncolumns[k]; l++) { 2857 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2858 if (coloring->htype[0] == 'w') { 2859 dx = 1.0 + unorm; 2860 } else { 2861 dx = xx[col]; 2862 } 2863 if (dx == 0.0) dx = 1.0; 2864 #if !defined(PETSC_USE_COMPLEX) 2865 if (dx < umin && dx >= 0.0) dx = umin; 2866 else if (dx < 0.0 && dx > -umin) dx = -umin; 2867 #else 2868 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2869 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2870 #endif 2871 dx *= epsilon; 2872 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2873 w3_array[col] += dx; 2874 } 2875 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2876 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2877 2878 /* 2879 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2880 w2 = F(x1 + dx) - F(x1) 2881 */ 2882 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2883 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2884 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2885 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2886 2887 /* 2888 Loop over rows of vector, putting results into Jacobian matrix 2889 */ 2890 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2891 for (l=0; l<coloring->nrows[k]; l++) { 2892 row = bs*coloring->rows[k][l]; /* local row index */ 2893 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2894 for (j=0; j<bs; j++) { 2895 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2896 srows[j] = row + start + j; 2897 } 2898 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2899 } 2900 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2901 } 2902 } /* endof for each color */ 2903 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2904 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2905 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2906 ierr = PetscFree(srows);CHKERRQ(ierr); 2907 2908 coloring->currentcolor = -1; 2909 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2911 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 /* -------------------------------------------------------------------*/ 2916 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2917 MatGetRow_SeqBAIJ, 2918 MatRestoreRow_SeqBAIJ, 2919 MatMult_SeqBAIJ_N, 2920 /* 4*/ MatMultAdd_SeqBAIJ_N, 2921 MatMultTranspose_SeqBAIJ, 2922 MatMultTransposeAdd_SeqBAIJ, 2923 0, 2924 0, 2925 0, 2926 /*10*/ 0, 2927 MatLUFactor_SeqBAIJ, 2928 0, 2929 0, 2930 MatTranspose_SeqBAIJ, 2931 /*15*/ MatGetInfo_SeqBAIJ, 2932 MatEqual_SeqBAIJ, 2933 MatGetDiagonal_SeqBAIJ, 2934 MatDiagonalScale_SeqBAIJ, 2935 MatNorm_SeqBAIJ, 2936 /*20*/ 0, 2937 MatAssemblyEnd_SeqBAIJ, 2938 MatSetOption_SeqBAIJ, 2939 MatZeroEntries_SeqBAIJ, 2940 /*24*/ MatZeroRows_SeqBAIJ, 2941 0, 2942 0, 2943 0, 2944 0, 2945 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2946 0, 2947 0, 2948 MatGetArray_SeqBAIJ, 2949 MatRestoreArray_SeqBAIJ, 2950 /*34*/ MatDuplicate_SeqBAIJ, 2951 0, 2952 0, 2953 MatILUFactor_SeqBAIJ, 2954 0, 2955 /*39*/ MatAXPY_SeqBAIJ, 2956 MatGetSubMatrices_SeqBAIJ, 2957 MatIncreaseOverlap_SeqBAIJ, 2958 MatGetValues_SeqBAIJ, 2959 MatCopy_SeqBAIJ, 2960 /*44*/ 0, 2961 MatScale_SeqBAIJ, 2962 0, 2963 0, 2964 MatZeroRowsColumns_SeqBAIJ, 2965 /*49*/ MatSetBlockSize_SeqBAIJ, 2966 MatGetRowIJ_SeqBAIJ, 2967 MatRestoreRowIJ_SeqBAIJ, 2968 MatGetColumnIJ_SeqBAIJ, 2969 MatRestoreColumnIJ_SeqBAIJ, 2970 /*54*/ MatFDColoringCreate_SeqAIJ, 2971 0, 2972 0, 2973 0, 2974 MatSetValuesBlocked_SeqBAIJ, 2975 /*59*/ MatGetSubMatrix_SeqBAIJ, 2976 MatDestroy_SeqBAIJ, 2977 MatView_SeqBAIJ, 2978 0, 2979 0, 2980 /*64*/ 0, 2981 0, 2982 0, 2983 0, 2984 0, 2985 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2986 0, 2987 MatConvert_Basic, 2988 0, 2989 0, 2990 /*74*/ 0, 2991 MatFDColoringApply_BAIJ, 2992 0, 2993 0, 2994 0, 2995 /*79*/ 0, 2996 0, 2997 0, 2998 0, 2999 MatLoad_SeqBAIJ, 3000 /*84*/ 0, 3001 0, 3002 0, 3003 0, 3004 0, 3005 /*89*/ 0, 3006 0, 3007 0, 3008 0, 3009 0, 3010 /*94*/ 0, 3011 0, 3012 0, 3013 0, 3014 0, 3015 /*99*/0, 3016 0, 3017 0, 3018 0, 3019 0, 3020 /*104*/0, 3021 MatRealPart_SeqBAIJ, 3022 MatImaginaryPart_SeqBAIJ, 3023 0, 3024 0, 3025 /*109*/0, 3026 0, 3027 0, 3028 0, 3029 MatMissingDiagonal_SeqBAIJ, 3030 /*114*/0, 3031 0, 3032 0, 3033 0, 3034 0, 3035 /*119*/0, 3036 0, 3037 MatMultHermitianTranspose_SeqBAIJ, 3038 MatMultHermitianTransposeAdd_SeqBAIJ, 3039 0, 3040 }; 3041 3042 EXTERN_C_BEGIN 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3045 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3046 { 3047 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3048 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3049 PetscErrorCode ierr; 3050 3051 PetscFunctionBegin; 3052 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3053 3054 /* allocate space for values if not already there */ 3055 if (!aij->saved_values) { 3056 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3057 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3058 } 3059 3060 /* copy values over */ 3061 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 EXTERN_C_BEGIN 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3069 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3070 { 3071 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3072 PetscErrorCode ierr; 3073 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3074 3075 PetscFunctionBegin; 3076 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3077 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3078 3079 /* copy values over */ 3080 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 EXTERN_C_END 3084 3085 EXTERN_C_BEGIN 3086 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3087 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3088 EXTERN_C_END 3089 3090 EXTERN_C_BEGIN 3091 #undef __FUNCT__ 3092 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3093 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3094 { 3095 Mat_SeqBAIJ *b; 3096 PetscErrorCode ierr; 3097 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3098 PetscBool flg,skipallocation = PETSC_FALSE; 3099 3100 PetscFunctionBegin; 3101 3102 if (nz == MAT_SKIP_ALLOCATION) { 3103 skipallocation = PETSC_TRUE; 3104 nz = 0; 3105 } 3106 3107 if (bs < 0) { 3108 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3109 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3110 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3111 bs = PetscAbs(bs); 3112 } 3113 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3114 bs = newbs; 3115 3116 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3117 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3118 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3119 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3120 3121 B->preallocated = PETSC_TRUE; 3122 3123 mbs = B->rmap->n/bs; 3124 nbs = B->cmap->n/bs; 3125 bs2 = bs*bs; 3126 3127 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3128 3129 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3130 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3131 if (nnz) { 3132 for (i=0; i<mbs; i++) { 3133 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3134 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3135 } 3136 } 3137 3138 b = (Mat_SeqBAIJ*)B->data; 3139 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3140 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3141 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3142 3143 if (!flg) { 3144 switch (bs) { 3145 case 1: 3146 B->ops->mult = MatMult_SeqBAIJ_1; 3147 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3148 B->ops->sor = MatSOR_SeqBAIJ_1; 3149 break; 3150 case 2: 3151 B->ops->mult = MatMult_SeqBAIJ_2; 3152 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3153 B->ops->sor = MatSOR_SeqBAIJ_2; 3154 break; 3155 case 3: 3156 B->ops->mult = MatMult_SeqBAIJ_3; 3157 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3158 B->ops->sor = MatSOR_SeqBAIJ_3; 3159 break; 3160 case 4: 3161 B->ops->mult = MatMult_SeqBAIJ_4; 3162 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3163 B->ops->sor = MatSOR_SeqBAIJ_4; 3164 break; 3165 case 5: 3166 B->ops->mult = MatMult_SeqBAIJ_5; 3167 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3168 B->ops->sor = MatSOR_SeqBAIJ_5; 3169 break; 3170 case 6: 3171 B->ops->mult = MatMult_SeqBAIJ_6; 3172 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3173 B->ops->sor = MatSOR_SeqBAIJ_6; 3174 break; 3175 case 7: 3176 B->ops->mult = MatMult_SeqBAIJ_7; 3177 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3178 B->ops->sor = MatSOR_SeqBAIJ_7; 3179 break; 3180 case 15: 3181 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3182 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3183 B->ops->sor = MatSOR_SeqBAIJ_N; 3184 break; 3185 default: 3186 B->ops->mult = MatMult_SeqBAIJ_N; 3187 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3188 B->ops->sor = MatSOR_SeqBAIJ_N; 3189 break; 3190 } 3191 } 3192 B->rmap->bs = bs; 3193 b->mbs = mbs; 3194 b->nbs = nbs; 3195 if (!skipallocation) { 3196 if (!b->imax) { 3197 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3198 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3199 b->free_imax_ilen = PETSC_TRUE; 3200 } 3201 /* b->ilen will count nonzeros in each block row so far. */ 3202 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3203 if (!nnz) { 3204 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3205 else if (nz < 0) nz = 1; 3206 for (i=0; i<mbs; i++) b->imax[i] = nz; 3207 nz = nz*mbs; 3208 } else { 3209 nz = 0; 3210 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3211 } 3212 3213 /* allocate the matrix space */ 3214 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3215 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3216 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3217 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3218 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3219 b->singlemalloc = PETSC_TRUE; 3220 b->i[0] = 0; 3221 for (i=1; i<mbs+1; i++) { 3222 b->i[i] = b->i[i-1] + b->imax[i-1]; 3223 } 3224 b->free_a = PETSC_TRUE; 3225 b->free_ij = PETSC_TRUE; 3226 } else { 3227 b->free_a = PETSC_FALSE; 3228 b->free_ij = PETSC_FALSE; 3229 } 3230 3231 B->rmap->bs = bs; 3232 b->bs2 = bs2; 3233 b->mbs = mbs; 3234 b->nz = 0; 3235 b->maxnz = nz; 3236 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3237 PetscFunctionReturn(0); 3238 } 3239 EXTERN_C_END 3240 3241 EXTERN_C_BEGIN 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3244 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3245 { 3246 PetscInt i,m,nz,nz_max=0,*nnz; 3247 PetscScalar *values=0; 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3252 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3253 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3254 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3255 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3256 m = B->rmap->n/bs; 3257 3258 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3259 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3260 for(i=0; i<m; i++) { 3261 nz = ii[i+1]- ii[i]; 3262 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3263 nz_max = PetscMax(nz_max, nz); 3264 nnz[i] = nz; 3265 } 3266 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3267 ierr = PetscFree(nnz);CHKERRQ(ierr); 3268 3269 values = (PetscScalar*)V; 3270 if (!values) { 3271 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3272 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3273 } 3274 for (i=0; i<m; i++) { 3275 PetscInt ncols = ii[i+1] - ii[i]; 3276 const PetscInt *icols = jj + ii[i]; 3277 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3278 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3279 } 3280 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3281 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3282 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 3284 PetscFunctionReturn(0); 3285 } 3286 EXTERN_C_END 3287 3288 3289 EXTERN_C_BEGIN 3290 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3291 #if defined(PETSC_HAVE_MUMPS) 3292 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3293 #endif 3294 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3295 EXTERN_C_END 3296 3297 /*MC 3298 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3299 block sparse compressed row format. 3300 3301 Options Database Keys: 3302 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3303 3304 Level: beginner 3305 3306 .seealso: MatCreateSeqBAIJ() 3307 M*/ 3308 3309 3310 EXTERN_C_BEGIN 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatCreate_SeqBAIJ" 3313 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3314 { 3315 PetscErrorCode ierr; 3316 PetscMPIInt size; 3317 Mat_SeqBAIJ *b; 3318 3319 PetscFunctionBegin; 3320 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3321 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3322 3323 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3324 B->data = (void*)b; 3325 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3326 b->row = 0; 3327 b->col = 0; 3328 b->icol = 0; 3329 b->reallocs = 0; 3330 b->saved_values = 0; 3331 3332 b->roworiented = PETSC_TRUE; 3333 b->nonew = 0; 3334 b->diag = 0; 3335 b->solve_work = 0; 3336 b->mult_work = 0; 3337 B->spptr = 0; 3338 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3339 b->keepnonzeropattern = PETSC_FALSE; 3340 b->xtoy = 0; 3341 b->XtoY = 0; 3342 B->same_nonzero = PETSC_FALSE; 3343 3344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3345 "MatGetFactorAvailable_seqbaij_petsc", 3346 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3348 "MatGetFactor_seqbaij_petsc", 3349 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3350 #if defined(PETSC_HAVE_MUMPS) 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3352 #endif 3353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3354 "MatInvertBlockDiagonal_SeqBAIJ", 3355 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3356 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3357 "MatStoreValues_SeqBAIJ", 3358 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3359 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3360 "MatRetrieveValues_SeqBAIJ", 3361 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3363 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3364 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3366 "MatConvert_SeqBAIJ_SeqAIJ", 3367 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3368 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3369 "MatConvert_SeqBAIJ_SeqSBAIJ", 3370 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3371 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3372 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3373 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3374 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3375 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3376 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3377 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3378 PetscFunctionReturn(0); 3379 } 3380 EXTERN_C_END 3381 3382 #undef __FUNCT__ 3383 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3384 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3385 { 3386 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3387 PetscErrorCode ierr; 3388 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3389 3390 PetscFunctionBegin; 3391 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3392 3393 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3394 c->imax = a->imax; 3395 c->ilen = a->ilen; 3396 c->free_imax_ilen = PETSC_FALSE; 3397 } else { 3398 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3399 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3400 for (i=0; i<mbs; i++) { 3401 c->imax[i] = a->imax[i]; 3402 c->ilen[i] = a->ilen[i]; 3403 } 3404 c->free_imax_ilen = PETSC_TRUE; 3405 } 3406 3407 /* allocate the matrix space */ 3408 if (mallocmatspace){ 3409 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3410 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3411 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3412 c->singlemalloc = PETSC_FALSE; 3413 c->free_ij = PETSC_FALSE; 3414 c->i = a->i; 3415 c->j = a->j; 3416 c->parent = A; 3417 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3418 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3419 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3420 } else { 3421 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3422 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3423 c->singlemalloc = PETSC_TRUE; 3424 c->free_ij = PETSC_TRUE; 3425 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3426 if (mbs > 0) { 3427 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3428 if (cpvalues == MAT_COPY_VALUES) { 3429 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3430 } else { 3431 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3432 } 3433 } 3434 } 3435 } 3436 3437 c->roworiented = a->roworiented; 3438 c->nonew = a->nonew; 3439 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3440 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3441 c->bs2 = a->bs2; 3442 c->mbs = a->mbs; 3443 c->nbs = a->nbs; 3444 3445 if (a->diag) { 3446 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3447 c->diag = a->diag; 3448 c->free_diag = PETSC_FALSE; 3449 } else { 3450 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3451 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3452 for (i=0; i<mbs; i++) { 3453 c->diag[i] = a->diag[i]; 3454 } 3455 c->free_diag = PETSC_TRUE; 3456 } 3457 } else c->diag = 0; 3458 c->nz = a->nz; 3459 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3460 c->solve_work = 0; 3461 c->mult_work = 0; 3462 c->free_a = PETSC_TRUE; 3463 c->free_ij = PETSC_TRUE; 3464 C->preallocated = PETSC_TRUE; 3465 C->assembled = PETSC_TRUE; 3466 3467 c->compressedrow.use = a->compressedrow.use; 3468 c->compressedrow.nrows = a->compressedrow.nrows; 3469 c->compressedrow.check = a->compressedrow.check; 3470 if (a->compressedrow.use){ 3471 i = a->compressedrow.nrows; 3472 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3473 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3474 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3475 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3476 } else { 3477 c->compressedrow.use = PETSC_FALSE; 3478 c->compressedrow.i = PETSC_NULL; 3479 c->compressedrow.rindex = PETSC_NULL; 3480 } 3481 C->same_nonzero = A->same_nonzero; 3482 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3483 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3484 PetscFunctionReturn(0); 3485 } 3486 3487 #undef __FUNCT__ 3488 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3489 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3490 { 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3495 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3496 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3497 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatLoad_SeqBAIJ" 3503 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3504 { 3505 Mat_SeqBAIJ *a; 3506 PetscErrorCode ierr; 3507 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3508 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3509 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3510 PetscInt *masked,nmask,tmp,bs2,ishift; 3511 PetscMPIInt size; 3512 int fd; 3513 PetscScalar *aa; 3514 MPI_Comm comm = ((PetscObject)viewer)->comm; 3515 3516 PetscFunctionBegin; 3517 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3518 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3519 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3520 bs2 = bs*bs; 3521 3522 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3523 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3524 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3525 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3526 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3527 M = header[1]; N = header[2]; nz = header[3]; 3528 3529 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3530 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3531 3532 /* 3533 This code adds extra rows to make sure the number of rows is 3534 divisible by the blocksize 3535 */ 3536 mbs = M/bs; 3537 extra_rows = bs - M + bs*(mbs); 3538 if (extra_rows == bs) extra_rows = 0; 3539 else mbs++; 3540 if (extra_rows) { 3541 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3542 } 3543 3544 /* Set global sizes if not already set */ 3545 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3546 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3547 } else { /* Check if the matrix global sizes are correct */ 3548 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3549 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3550 } 3551 3552 /* read in row lengths */ 3553 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3554 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3555 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3556 3557 /* read in column indices */ 3558 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3559 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3560 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3561 3562 /* loop over row lengths determining block row lengths */ 3563 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3564 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3565 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3566 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3567 rowcount = 0; 3568 nzcount = 0; 3569 for (i=0; i<mbs; i++) { 3570 nmask = 0; 3571 for (j=0; j<bs; j++) { 3572 kmax = rowlengths[rowcount]; 3573 for (k=0; k<kmax; k++) { 3574 tmp = jj[nzcount++]/bs; 3575 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3576 } 3577 rowcount++; 3578 } 3579 browlengths[i] += nmask; 3580 /* zero out the mask elements we set */ 3581 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3582 } 3583 3584 /* Do preallocation */ 3585 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3586 a = (Mat_SeqBAIJ*)newmat->data; 3587 3588 /* set matrix "i" values */ 3589 a->i[0] = 0; 3590 for (i=1; i<= mbs; i++) { 3591 a->i[i] = a->i[i-1] + browlengths[i-1]; 3592 a->ilen[i-1] = browlengths[i-1]; 3593 } 3594 a->nz = 0; 3595 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3596 3597 /* read in nonzero values */ 3598 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3599 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3600 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3601 3602 /* set "a" and "j" values into matrix */ 3603 nzcount = 0; jcount = 0; 3604 for (i=0; i<mbs; i++) { 3605 nzcountb = nzcount; 3606 nmask = 0; 3607 for (j=0; j<bs; j++) { 3608 kmax = rowlengths[i*bs+j]; 3609 for (k=0; k<kmax; k++) { 3610 tmp = jj[nzcount++]/bs; 3611 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3612 } 3613 } 3614 /* sort the masked values */ 3615 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3616 3617 /* set "j" values into matrix */ 3618 maskcount = 1; 3619 for (j=0; j<nmask; j++) { 3620 a->j[jcount++] = masked[j]; 3621 mask[masked[j]] = maskcount++; 3622 } 3623 /* set "a" values into matrix */ 3624 ishift = bs2*a->i[i]; 3625 for (j=0; j<bs; j++) { 3626 kmax = rowlengths[i*bs+j]; 3627 for (k=0; k<kmax; k++) { 3628 tmp = jj[nzcountb]/bs ; 3629 block = mask[tmp] - 1; 3630 point = jj[nzcountb] - bs*tmp; 3631 idx = ishift + bs2*block + j + bs*point; 3632 a->a[idx] = (MatScalar)aa[nzcountb++]; 3633 } 3634 } 3635 /* zero out the mask elements we set */ 3636 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3637 } 3638 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3639 3640 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3641 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3642 ierr = PetscFree(aa);CHKERRQ(ierr); 3643 ierr = PetscFree(jj);CHKERRQ(ierr); 3644 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3645 3646 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3649 PetscFunctionReturn(0); 3650 } 3651 3652 #undef __FUNCT__ 3653 #define __FUNCT__ "MatCreateSeqBAIJ" 3654 /*@C 3655 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3656 compressed row) format. For good matrix assembly performance the 3657 user should preallocate the matrix storage by setting the parameter nz 3658 (or the array nnz). By setting these parameters accurately, performance 3659 during matrix assembly can be increased by more than a factor of 50. 3660 3661 Collective on MPI_Comm 3662 3663 Input Parameters: 3664 + comm - MPI communicator, set to PETSC_COMM_SELF 3665 . bs - size of block 3666 . m - number of rows 3667 . n - number of columns 3668 . nz - number of nonzero blocks per block row (same for all rows) 3669 - nnz - array containing the number of nonzero blocks in the various block rows 3670 (possibly different for each block row) or PETSC_NULL 3671 3672 Output Parameter: 3673 . A - the matrix 3674 3675 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3676 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3677 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3678 3679 Options Database Keys: 3680 . -mat_no_unroll - uses code that does not unroll the loops in the 3681 block calculations (much slower) 3682 . -mat_block_size - size of the blocks to use 3683 3684 Level: intermediate 3685 3686 Notes: 3687 The number of rows and columns must be divisible by blocksize. 3688 3689 If the nnz parameter is given then the nz parameter is ignored 3690 3691 A nonzero block is any block that as 1 or more nonzeros in it 3692 3693 The block AIJ format is fully compatible with standard Fortran 77 3694 storage. That is, the stored row and column indices can begin at 3695 either one (as in Fortran) or zero. See the users' manual for details. 3696 3697 Specify the preallocated storage with either nz or nnz (not both). 3698 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3699 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3700 matrices. 3701 3702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3703 @*/ 3704 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3705 { 3706 PetscErrorCode ierr; 3707 3708 PetscFunctionBegin; 3709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3710 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3711 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3712 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3713 PetscFunctionReturn(0); 3714 } 3715 3716 #undef __FUNCT__ 3717 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3718 /*@C 3719 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3720 per row in the matrix. For good matrix assembly performance the 3721 user should preallocate the matrix storage by setting the parameter nz 3722 (or the array nnz). By setting these parameters accurately, performance 3723 during matrix assembly can be increased by more than a factor of 50. 3724 3725 Collective on MPI_Comm 3726 3727 Input Parameters: 3728 + A - the matrix 3729 . bs - size of block 3730 . nz - number of block nonzeros per block row (same for all rows) 3731 - nnz - array containing the number of block nonzeros in the various block rows 3732 (possibly different for each block row) or PETSC_NULL 3733 3734 Options Database Keys: 3735 . -mat_no_unroll - uses code that does not unroll the loops in the 3736 block calculations (much slower) 3737 . -mat_block_size - size of the blocks to use 3738 3739 Level: intermediate 3740 3741 Notes: 3742 If the nnz parameter is given then the nz parameter is ignored 3743 3744 You can call MatGetInfo() to get information on how effective the preallocation was; 3745 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3746 You can also run with the option -info and look for messages with the string 3747 malloc in them to see if additional memory allocation was needed. 3748 3749 The block AIJ format is fully compatible with standard Fortran 77 3750 storage. That is, the stored row and column indices can begin at 3751 either one (as in Fortran) or zero. See the users' manual for details. 3752 3753 Specify the preallocated storage with either nz or nnz (not both). 3754 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3755 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3756 3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3758 @*/ 3759 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3760 { 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3770 /*@C 3771 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3772 (the default sequential PETSc format). 3773 3774 Collective on MPI_Comm 3775 3776 Input Parameters: 3777 + A - the matrix 3778 . i - the indices into j for the start of each local row (starts with zero) 3779 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3780 - v - optional values in the matrix 3781 3782 Level: developer 3783 3784 .keywords: matrix, aij, compressed row, sparse 3785 3786 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3787 @*/ 3788 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3789 { 3790 PetscErrorCode ierr; 3791 3792 PetscFunctionBegin; 3793 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3794 PetscFunctionReturn(0); 3795 } 3796 3797 3798 #undef __FUNCT__ 3799 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3800 /*@ 3801 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3802 3803 Collective on MPI_Comm 3804 3805 Input Parameters: 3806 + comm - must be an MPI communicator of size 1 3807 . bs - size of block 3808 . m - number of rows 3809 . n - number of columns 3810 . i - row indices 3811 . j - column indices 3812 - a - matrix values 3813 3814 Output Parameter: 3815 . mat - the matrix 3816 3817 Level: advanced 3818 3819 Notes: 3820 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3821 once the matrix is destroyed 3822 3823 You cannot set new nonzero locations into this matrix, that will generate an error. 3824 3825 The i and j indices are 0 based 3826 3827 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3828 3829 3830 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3831 3832 @*/ 3833 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3834 { 3835 PetscErrorCode ierr; 3836 PetscInt ii; 3837 Mat_SeqBAIJ *baij; 3838 3839 PetscFunctionBegin; 3840 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3841 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3842 3843 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3844 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3845 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3846 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3847 baij = (Mat_SeqBAIJ*)(*mat)->data; 3848 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3849 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3850 3851 baij->i = i; 3852 baij->j = j; 3853 baij->a = a; 3854 baij->singlemalloc = PETSC_FALSE; 3855 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3856 baij->free_a = PETSC_FALSE; 3857 baij->free_ij = PETSC_FALSE; 3858 3859 for (ii=0; ii<m; ii++) { 3860 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3861 #if defined(PETSC_USE_DEBUG) 3862 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3863 #endif 3864 } 3865 #if defined(PETSC_USE_DEBUG) 3866 for (ii=0; ii<baij->i[m]; ii++) { 3867 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3868 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3869 } 3870 #endif 3871 3872 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 PetscFunctionReturn(0); 3875 } 3876