1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 28 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 EXTERN_C_BEGIN 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 37 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 40 PetscErrorCode ierr; 41 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 42 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 43 PetscReal shift = 0.0; 44 45 PetscFunctionBegin; 46 if (a->idiagvalid) PetscFunctionReturn(0); 47 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 48 diag_offset = a->diag; 49 if (!a->idiag) { 50 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 51 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 52 } 53 diag = a->idiag; 54 mdiag = a->idiag+bs2*mbs; 55 /* factor and invert each block */ 56 switch (bs){ 57 case 1: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 1*diag_offset[i]; 60 diag[0] = odiag[0]; 61 mdiag[0] = odiag[0]; 62 diag[0] = 1.0 / (diag[0] + shift); 63 diag += 1; 64 mdiag += 1; 65 } 66 break; 67 case 2: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 4*diag_offset[i]; 70 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 71 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 72 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 87 diag += 9; 88 mdiag += 9; 89 } 90 break; 91 case 4: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 16*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 107 diag += 25; 108 mdiag += 25; 109 } 110 break; 111 case 6: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 36*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 127 diag += 49; 128 mdiag += 49; 129 } 130 break; 131 default: 132 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 133 for (i=0; i<mbs; i++) { 134 odiag = v + bs2*diag_offset[i]; 135 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 153 PetscScalar *x,x1,s1; 154 const PetscScalar *b; 155 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,j; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 its = its*lits; 163 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 164 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 165 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 166 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 167 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 168 169 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 170 171 diag = a->diag; 172 idiag = a->idiag; 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 178 x[0] = b[0]*idiag[0]; 179 i2 = 1; 180 idiag += 1; 181 for (i=1; i<m; i++) { 182 v = aa + ai[i]; 183 vi = aj + ai[i]; 184 nz = diag[i] - ai[i]; 185 s1 = b[i2]; 186 for (j=0; j<nz; j++) { 187 s1 -= v[j]*x[vi[j]]; 188 } 189 x[i2] = idiag[0]*s1; 190 idiag += 1; 191 i2 += 1; 192 } 193 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 194 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 195 } 196 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 197 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 198 i2 = 0; 199 mdiag = a->idiag+a->mbs; 200 for (i=0; i<m; i++) { 201 x1 = x[i2]; 202 x[i2] = mdiag[0]*x1; 203 mdiag += 1; 204 i2 += 1; 205 } 206 ierr = PetscLogFlops(m);CHKERRQ(ierr); 207 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 208 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 209 } 210 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 211 idiag = a->idiag+a->mbs - 1; 212 i2 = m - 1; 213 x1 = x[i2]; 214 x[i2] = idiag[0]*x1; 215 idiag -= 1; 216 i2 -= 1; 217 for (i=m-2; i>=0; i--) { 218 v = aa + (diag[i]+1); 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 s1 = x[i2]; 222 for (j=0; j<nz; j++) { 223 s1 -= v[j]*x[vi[j]]; 224 } 225 x[i2] = idiag[0]*s1; 226 idiag -= 1; 227 i2 -= 1; 228 } 229 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 230 } 231 } else { 232 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 233 } 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 PetscScalar *x,x1,x2,s1,s2; 245 const PetscScalar *b; 246 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 247 PetscErrorCode ierr; 248 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 249 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 250 251 PetscFunctionBegin; 252 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 253 its = its*lits; 254 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 255 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 256 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 257 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 258 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 259 260 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 261 262 diag = a->diag; 263 idiag = a->idiag; 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 266 267 if (flag & SOR_ZERO_INITIAL_GUESS) { 268 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 269 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 270 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 271 i2 = 2; 272 idiag += 4; 273 for (i=1; i<m; i++) { 274 v = aa + 4*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s1 = b[i2]; s2 = b[i2+1]; 278 for (j=0; j<nz; j++) { 279 idx = 2*vi[j]; 280 it = 4*j; 281 x1 = x[idx]; x2 = x[1+idx]; 282 s1 -= v[it]*x1 + v[it+2]*x2; 283 s2 -= v[it+1]*x1 + v[it+3]*x2; 284 } 285 x[i2] = idiag[0]*s1 + idiag[2]*s2; 286 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 287 idiag += 4; 288 i2 += 2; 289 } 290 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 291 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 292 } 293 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 294 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 295 i2 = 0; 296 mdiag = a->idiag+4*a->mbs; 297 for (i=0; i<m; i++) { 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 300 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 301 mdiag += 4; 302 i2 += 2; 303 } 304 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 305 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 306 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 307 } 308 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 309 idiag = a->idiag+4*a->mbs - 4; 310 i2 = 2*m - 2; 311 x1 = x[i2]; x2 = x[i2+1]; 312 x[i2] = idiag[0]*x1 + idiag[2]*x2; 313 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 314 idiag -= 4; 315 i2 -= 2; 316 for (i=m-2; i>=0; i--) { 317 v = aa + 4*(diag[i]+1); 318 vi = aj + diag[i] + 1; 319 nz = ai[i+1] - diag[i] - 1; 320 s1 = x[i2]; s2 = x[i2+1]; 321 for (j=0; j<nz; j++) { 322 idx = 2*vi[j]; 323 it = 4*j; 324 x1 = x[idx]; x2 = x[1+idx]; 325 s1 -= v[it]*x1 + v[it+2]*x2; 326 s2 -= v[it+1]*x1 + v[it+3]*x2; 327 } 328 x[i2] = idiag[0]*s1 + idiag[2]*s2; 329 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 330 idiag -= 4; 331 i2 -= 2; 332 } 333 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 334 } 335 } else { 336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 337 } 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 348 PetscScalar *x,x1,x2,x3,s1,s2,s3; 349 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 350 const PetscScalar *b; 351 PetscErrorCode ierr; 352 PetscInt m = a->mbs,i,i2,nz,idx; 353 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 354 355 PetscFunctionBegin; 356 its = its*lits; 357 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 358 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 359 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 360 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 361 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 362 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 363 364 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 365 366 diag = a->diag; 367 idiag = a->idiag; 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 370 371 if (flag & SOR_ZERO_INITIAL_GUESS) { 372 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 373 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 374 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 375 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 376 i2 = 3; 377 idiag += 9; 378 for (i=1; i<m; i++) { 379 v = aa + 9*ai[i]; 380 vi = aj + ai[i]; 381 nz = diag[i] - ai[i]; 382 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 383 while (nz--) { 384 idx = 3*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 386 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 387 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 388 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 392 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 393 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 394 idiag += 9; 395 i2 += 3; 396 } 397 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 398 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 399 } 400 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 401 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 402 i2 = 0; 403 mdiag = a->idiag+9*a->mbs; 404 for (i=0; i<m; i++) { 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 406 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 407 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 408 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 409 mdiag += 9; 410 i2 += 3; 411 } 412 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 413 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 414 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 415 } 416 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 417 idiag = a->idiag+9*a->mbs - 9; 418 i2 = 3*m - 3; 419 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 420 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 421 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 422 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 423 idiag -= 9; 424 i2 -= 3; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 433 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 434 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 435 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 436 v += 9; 437 } 438 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 439 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 440 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 445 } 446 } else { 447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 448 } 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 459 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 460 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 461 const PetscScalar *b; 462 PetscErrorCode ierr; 463 PetscInt m = a->mbs,i,i2,nz,idx; 464 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 465 466 PetscFunctionBegin; 467 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 468 its = its*lits; 469 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 470 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 471 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 472 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 473 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 474 475 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 476 477 diag = a->diag; 478 idiag = a->idiag; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 484 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 485 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 486 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 487 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 488 i2 = 4; 489 idiag += 16; 490 for (i=1; i<m; i++) { 491 v = aa + 16*ai[i]; 492 vi = aj + ai[i]; 493 nz = diag[i] - ai[i]; 494 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 495 while (nz--) { 496 idx = 4*(*vi++); 497 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 498 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 499 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 500 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 501 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 502 v += 16; 503 } 504 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 505 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 506 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 507 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 508 idiag += 16; 509 i2 += 4; 510 } 511 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 512 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 513 } 514 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 515 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 516 i2 = 0; 517 mdiag = a->idiag+16*a->mbs; 518 for (i=0; i<m; i++) { 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 521 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 522 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 523 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 524 mdiag += 16; 525 i2 += 4; 526 } 527 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 528 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 529 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 530 } 531 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 532 idiag = a->idiag+16*a->mbs - 16; 533 i2 = 4*m - 4; 534 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 535 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 536 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 537 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 538 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 539 idiag -= 16; 540 i2 -= 4; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 16*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 546 while (nz--) { 547 idx = 4*(*vi++); 548 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 549 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 556 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 557 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 558 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 559 idiag -= 16; 560 i2 -= 4; 561 } 562 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 563 } 564 } else { 565 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 566 } 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 577 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 578 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 579 const PetscScalar *b; 580 PetscErrorCode ierr; 581 PetscInt m = a->mbs,i,i2,nz,idx; 582 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 583 584 PetscFunctionBegin; 585 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 586 its = its*lits; 587 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 588 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 589 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 590 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 591 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 592 593 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 594 595 diag = a->diag; 596 idiag = a->idiag; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 599 600 if (flag & SOR_ZERO_INITIAL_GUESS) { 601 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 602 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 603 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 604 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 605 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 606 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 607 i2 = 5; 608 idiag += 25; 609 for (i=1; i<m; i++) { 610 v = aa + 25*ai[i]; 611 vi = aj + ai[i]; 612 nz = diag[i] - ai[i]; 613 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 614 while (nz--) { 615 idx = 5*(*vi++); 616 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 617 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 618 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 619 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 620 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 621 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 622 v += 25; 623 } 624 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 625 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 626 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 627 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 628 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 629 idiag += 25; 630 i2 += 5; 631 } 632 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 633 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 634 } 635 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 636 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 637 i2 = 0; 638 mdiag = a->idiag+25*a->mbs; 639 for (i=0; i<m; i++) { 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 642 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 643 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 644 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 645 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 646 mdiag += 25; 647 i2 += 5; 648 } 649 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 650 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 651 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 652 } 653 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 654 idiag = a->idiag+25*a->mbs - 25; 655 i2 = 5*m - 5; 656 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 657 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 658 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 659 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 660 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 661 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 662 idiag -= 25; 663 i2 -= 5; 664 for (i=m-2; i>=0; i--) { 665 v = aa + 25*(diag[i]+1); 666 vi = aj + diag[i] + 1; 667 nz = ai[i+1] - diag[i] - 1; 668 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 672 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 673 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 674 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 675 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 676 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 677 v += 25; 678 } 679 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 680 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 681 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 682 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 683 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 684 idiag -= 25; 685 i2 -= 5; 686 } 687 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 688 } 689 } else { 690 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 702 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 703 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 704 const PetscScalar *b; 705 PetscErrorCode ierr; 706 PetscInt m = a->mbs,i,i2,nz,idx; 707 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 708 709 PetscFunctionBegin; 710 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 711 its = its*lits; 712 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 713 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 714 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 715 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 716 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 717 718 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 719 720 diag = a->diag; 721 idiag = a->idiag; 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 724 725 if (flag & SOR_ZERO_INITIAL_GUESS) { 726 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 727 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 728 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 729 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 730 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 731 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 732 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 733 i2 = 6; 734 idiag += 36; 735 for (i=1; i<m; i++) { 736 v = aa + 36*ai[i]; 737 vi = aj + ai[i]; 738 nz = diag[i] - ai[i]; 739 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 740 while (nz--) { 741 idx = 6*(*vi++); 742 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 743 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 744 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 745 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 746 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 747 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 748 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 749 v += 36; 750 } 751 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 752 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 753 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 754 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 755 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 756 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 757 idiag += 36; 758 i2 += 6; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 762 } 763 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 764 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 765 i2 = 0; 766 mdiag = a->idiag+36*a->mbs; 767 for (i=0; i<m; i++) { 768 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 769 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 770 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 771 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 772 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 773 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 774 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 775 mdiag += 36; 776 i2 += 6; 777 } 778 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 779 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 780 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 781 } 782 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 783 idiag = a->idiag+36*a->mbs - 36; 784 i2 = 6*m - 6; 785 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 786 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 787 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 788 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 789 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 790 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 791 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 792 idiag -= 36; 793 i2 -= 6; 794 for (i=m-2; i>=0; i--) { 795 v = aa + 36*(diag[i]+1); 796 vi = aj + diag[i] + 1; 797 nz = ai[i+1] - diag[i] - 1; 798 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 799 while (nz--) { 800 idx = 6*(*vi++); 801 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 802 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 803 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 804 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 805 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 806 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 807 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 808 v += 36; 809 } 810 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 811 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 812 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 813 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 814 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 815 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 816 idiag -= 36; 817 i2 -= 6; 818 } 819 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 820 } 821 } else { 822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 823 } 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 835 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 836 const PetscScalar *b; 837 PetscErrorCode ierr; 838 PetscInt m = a->mbs,i,i2,nz,idx; 839 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 840 841 PetscFunctionBegin; 842 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 843 its = its*lits; 844 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 845 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 846 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 847 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 848 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 849 850 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 851 852 diag = a->diag; 853 idiag = a->idiag; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 856 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 859 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 860 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 861 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 862 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 863 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 864 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 865 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 866 i2 = 7; 867 idiag += 49; 868 for (i=1; i<m; i++) { 869 v = aa + 49*ai[i]; 870 vi = aj + ai[i]; 871 nz = diag[i] - ai[i]; 872 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 873 while (nz--) { 874 idx = 7*(*vi++); 875 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 876 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 877 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 878 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 879 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 880 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 881 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 882 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 883 v += 49; 884 } 885 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 886 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 887 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 888 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 889 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 890 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 891 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 892 idiag += 49; 893 i2 += 7; 894 } 895 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 896 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 897 } 898 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 899 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 900 i2 = 0; 901 mdiag = a->idiag+49*a->mbs; 902 for (i=0; i<m; i++) { 903 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 904 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 905 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 906 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 907 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 908 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 909 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 910 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 911 mdiag += 49; 912 i2 += 7; 913 } 914 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 915 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 916 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 917 } 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 919 idiag = a->idiag+49*a->mbs - 49; 920 i2 = 7*m - 7; 921 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 922 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 923 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 924 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 925 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 926 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 927 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 928 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 929 idiag -= 49; 930 i2 -= 7; 931 for (i=m-2; i>=0; i--) { 932 v = aa + 49*(diag[i]+1); 933 vi = aj + diag[i] + 1; 934 nz = ai[i+1] - diag[i] - 1; 935 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 936 while (nz--) { 937 idx = 7*(*vi++); 938 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 939 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 940 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 941 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 942 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 943 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 944 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 945 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 946 v += 49; 947 } 948 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 949 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 950 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 951 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 952 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 953 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 954 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 955 idiag -= 49; 956 i2 -= 7; 957 } 958 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 959 } 960 } else { 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*work,*w,*workt; 974 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 975 const PetscScalar *b; 976 PetscErrorCode ierr; 977 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 978 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 979 980 PetscFunctionBegin; 981 its = its*lits; 982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 983 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 984 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 985 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 986 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 987 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 988 989 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 990 991 diag = a->diag; 992 idiag = a->idiag; 993 if (!a->mult_work) { 994 k = PetscMax(A->rmap->n,A->cmap->n); 995 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 996 } 997 work = a->mult_work; 998 if (!a->sor_work) { 999 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1000 } 1001 w = a->sor_work; 1002 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1005 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1008 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1009 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1010 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1011 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1012 i2 = bs; 1013 idiag += bs2; 1014 for (i=1; i<m; i++) { 1015 v = aa + bs2*ai[i]; 1016 vi = aj + ai[i]; 1017 nz = diag[i] - ai[i]; 1018 1019 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1020 /* copy all rows of x that are needed into contiguous space */ 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1027 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1028 while (nz--) { 1029 idx = N*(*vi++); 1030 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1031 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1032 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1033 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1034 v += N2; 1035 } */ 1036 1037 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1038 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1039 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1040 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1041 1042 idiag += bs2; 1043 i2 += bs; 1044 } 1045 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1046 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1047 } 1048 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1049 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1050 i2 = 0; 1051 mdiag = a->idiag+bs2*a->mbs; 1052 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 for (i=0; i<m; i++) { 1054 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1055 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1056 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1057 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1058 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1059 1060 mdiag += bs2; 1061 i2 += bs; 1062 } 1063 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1064 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1065 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 idiag = a->idiag+bs2*a->mbs - bs2; 1069 i2 = bs*m - bs; 1070 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1072 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1073 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1074 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1075 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1076 idiag -= bs2; 1077 i2 -= bs; 1078 for (i=m-2; i>=0; i--) { 1079 v = aa + bs2*(diag[i]+1); 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i+1] - diag[i] - 1; 1082 1083 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1084 /* copy all rows of x that are needed into contiguous space */ 1085 workt = work; 1086 for (j=0; j<nz; j++) { 1087 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1088 workt += bs; 1089 } 1090 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1091 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1092 while (nz--) { 1093 idx = N*(*vi++); 1094 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1095 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1096 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1097 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1098 v += N2; 1099 } */ 1100 1101 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1102 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1103 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1104 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1105 idiag -= bs2; 1106 i2 -= bs; 1107 } 1108 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1109 } 1110 } else { 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1112 } 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1120 */ 1121 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1124 #define matsetvaluesblocked4_ matsetvaluesblocked4 1125 #endif 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "matsetvaluesblocked4_" 1130 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1131 { 1132 Mat A = *AA; 1133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1134 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1135 PetscInt *ai=a->i,*ailen=a->ilen; 1136 PetscInt *aj=a->j,stepval,lastcol = -1; 1137 const PetscScalar *value = v; 1138 MatScalar *ap,*aa = a->a,*bap; 1139 1140 PetscFunctionBegin; 1141 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1142 stepval = (n-1)*4; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; 1145 rp = aj + ai[row]; 1146 ap = aa + 16*ai[row]; 1147 nrow = ailen[row]; 1148 low = 0; 1149 high = nrow; 1150 for (l=0; l<n; l++) { /* loop over added columns */ 1151 col = in[l]; 1152 if (col <= lastcol) low = 0; else high = nrow; 1153 lastcol = col; 1154 value = v + k*(stepval+4 + l)*4; 1155 while (high-low > 7) { 1156 t = (low+high)/2; 1157 if (rp[t] > col) high = t; 1158 else low = t; 1159 } 1160 for (i=low; i<high; i++) { 1161 if (rp[i] > col) break; 1162 if (rp[i] == col) { 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] += *value++; 1167 } 1168 } 1169 goto noinsert2; 1170 } 1171 } 1172 N = nrow++ - 1; 1173 high++; /* added new column index thus must search to one higher than before */ 1174 /* shift up all the later entries in this row */ 1175 for (ii=N; ii>=i; ii--) { 1176 rp[ii+1] = rp[ii]; 1177 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1178 } 1179 if (N >= i) { 1180 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1181 } 1182 rp[i] = col; 1183 bap = ap + 16*i; 1184 for (ii=0; ii<4; ii++,value+=stepval) { 1185 for (jj=ii; jj<16; jj+=4) { 1186 bap[jj] = *value++; 1187 } 1188 } 1189 noinsert2:; 1190 low = i; 1191 } 1192 ailen[row] = nrow; 1193 } 1194 PetscFunctionReturnVoid(); 1195 } 1196 EXTERN_C_END 1197 1198 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1199 #define matsetvalues4_ MATSETVALUES4 1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1201 #define matsetvalues4_ matsetvalues4 1202 #endif 1203 1204 EXTERN_C_BEGIN 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSetValues4_" 1207 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1208 { 1209 Mat A = *AA; 1210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1211 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1212 PetscInt *ai=a->i,*ailen=a->ilen; 1213 PetscInt *aj=a->j,brow,bcol; 1214 PetscInt ridx,cidx,lastcol = -1; 1215 MatScalar *ap,value,*aa=a->a,*bap; 1216 1217 PetscFunctionBegin; 1218 for (k=0; k<m; k++) { /* loop over added rows */ 1219 row = im[k]; brow = row/4; 1220 rp = aj + ai[brow]; 1221 ap = aa + 16*ai[brow]; 1222 nrow = ailen[brow]; 1223 low = 0; 1224 high = nrow; 1225 for (l=0; l<n; l++) { /* loop over added columns */ 1226 col = in[l]; bcol = col/4; 1227 ridx = row % 4; cidx = col % 4; 1228 value = v[l + k*n]; 1229 if (col <= lastcol) low = 0; else high = nrow; 1230 lastcol = col; 1231 while (high-low > 7) { 1232 t = (low+high)/2; 1233 if (rp[t] > bcol) high = t; 1234 else low = t; 1235 } 1236 for (i=low; i<high; i++) { 1237 if (rp[i] > bcol) break; 1238 if (rp[i] == bcol) { 1239 bap = ap + 16*i + 4*cidx + ridx; 1240 *bap += value; 1241 goto noinsert1; 1242 } 1243 } 1244 N = nrow++ - 1; 1245 high++; /* added new column thus must search to one higher than before */ 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1250 } 1251 if (N>=i) { 1252 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1253 } 1254 rp[i] = bcol; 1255 ap[16*i + 4*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturnVoid(); 1262 } 1263 EXTERN_C_END 1264 1265 /* 1266 Checks for missing diagonals 1267 */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 PetscInt *diag,*jj = a->j,i; 1275 1276 PetscFunctionBegin; 1277 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1278 *missing = PETSC_FALSE; 1279 if (A->rmap->n > 0 && !jj) { 1280 *missing = PETSC_TRUE; 1281 if (d) *d = 0; 1282 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1283 } else { 1284 diag = a->diag; 1285 for (i=0; i<a->mbs; i++) { 1286 if (jj[diag[i]] != i) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = i; 1289 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1290 } 1291 } 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 PetscInt i,j,m = a->mbs; 1303 1304 PetscFunctionBegin; 1305 if (!a->diag) { 1306 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1307 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1308 a->free_diag = PETSC_TRUE; 1309 } 1310 for (i=0; i<m; i++) { 1311 a->diag[i] = a->i[i+1]; 1312 for (j=a->i[i]; j<a->i[i+1]; j++) { 1313 if (a->j[j] == i) { 1314 a->diag[i] = j; 1315 break; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1332 PetscInt *tia, *tja; 1333 1334 PetscFunctionBegin; 1335 *nn = n; 1336 if (!ia) PetscFunctionReturn(0); 1337 if (symmetric) { 1338 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1339 nz = tia[n]; 1340 } else { 1341 tia = a->i; tja = a->j; 1342 } 1343 1344 if (!blockcompressed && bs > 1) { 1345 (*nn) *= bs; 1346 /* malloc & create the natural set of indices */ 1347 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1348 if (n) { 1349 (*ia)[0] = 0; 1350 for (j=1; j<bs; j++) { 1351 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1352 } 1353 } 1354 1355 for (i=1; i<n; i++) { 1356 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1359 } 1360 } 1361 if (n) { 1362 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1363 } 1364 1365 if (ja) { 1366 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1367 cnt = 0; 1368 for (i=0; i<n; i++) { 1369 for (j=0; j<bs; j++) { 1370 for (k=tia[i]; k<tia[i+1]; k++) { 1371 for (l=0; l<bs; l++) { 1372 (*ja)[cnt++] = bs*tja[k] + l; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 1379 n *= bs; 1380 nz *= bs*bs; 1381 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1382 ierr = PetscFree(tia);CHKERRQ(ierr); 1383 ierr = PetscFree(tja);CHKERRQ(ierr); 1384 } 1385 } else if (oshift == 1) { 1386 if (symmetric) { 1387 PetscInt nz = tia[A->rmap->n/bs]; 1388 /* add 1 to i and j indices */ 1389 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1390 *ia = tia; 1391 if (ja) { 1392 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1393 *ja = tja; 1394 } 1395 } else { 1396 PetscInt nz = a->i[A->rmap->n/bs]; 1397 /* malloc space and add 1 to i and j indices */ 1398 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1399 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1400 if (ja) { 1401 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1402 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1403 } 1404 } 1405 } else { 1406 *ia = tia; 1407 if (ja) *ja = tja; 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (!ia) PetscFunctionReturn(0); 1421 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1422 ierr = PetscFree(*ia);CHKERRQ(ierr); 1423 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 #if defined(PETSC_USE_LOG) 1437 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1438 #endif 1439 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1441 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1442 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1443 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1444 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1445 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1446 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1447 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1448 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1449 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1450 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1451 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1452 1453 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1454 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1455 ierr = PetscFree(a);CHKERRQ(ierr); 1456 1457 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1458 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1459 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1461 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1472 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1473 { 1474 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 switch (op) { 1479 case MAT_ROW_ORIENTED: 1480 a->roworiented = flg; 1481 break; 1482 case MAT_KEEP_NONZERO_PATTERN: 1483 a->keepnonzeropattern = flg; 1484 break; 1485 case MAT_NEW_NONZERO_LOCATIONS: 1486 a->nonew = (flg ? 0 : 1); 1487 break; 1488 case MAT_NEW_NONZERO_LOCATION_ERR: 1489 a->nonew = (flg ? -1 : 0); 1490 break; 1491 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1492 a->nonew = (flg ? -2 : 0); 1493 break; 1494 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1495 a->nounused = (flg ? -1 : 0); 1496 break; 1497 case MAT_CHECK_COMPRESSED_ROW: 1498 a->compressedrow.check = flg; 1499 break; 1500 case MAT_NEW_DIAGONALS: 1501 case MAT_IGNORE_OFF_PROC_ENTRIES: 1502 case MAT_USE_HASH_TABLE: 1503 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1504 break; 1505 case MAT_SYMMETRIC: 1506 case MAT_STRUCTURALLY_SYMMETRIC: 1507 case MAT_HERMITIAN: 1508 case MAT_SYMMETRY_ETERNAL: 1509 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1510 break; 1511 default: 1512 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1519 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1520 { 1521 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1522 PetscErrorCode ierr; 1523 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1524 MatScalar *aa,*aa_i; 1525 PetscScalar *v_i; 1526 1527 PetscFunctionBegin; 1528 bs = A->rmap->bs; 1529 ai = a->i; 1530 aj = a->j; 1531 aa = a->a; 1532 bs2 = a->bs2; 1533 1534 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1535 1536 bn = row/bs; /* Block number */ 1537 bp = row % bs; /* Block Position */ 1538 M = ai[bn+1] - ai[bn]; 1539 *nz = bs*M; 1540 1541 if (v) { 1542 *v = 0; 1543 if (*nz) { 1544 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1545 for (i=0; i<M; i++) { /* for each block in the block row */ 1546 v_i = *v + i*bs; 1547 aa_i = aa + bs2*(ai[bn] + i); 1548 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1549 } 1550 } 1551 } 1552 1553 if (idx) { 1554 *idx = 0; 1555 if (*nz) { 1556 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1557 for (i=0; i<M; i++) { /* for each block in the block row */ 1558 idx_i = *idx + i*bs; 1559 itmp = bs*aj[ai[bn] + i]; 1560 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1561 } 1562 } 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1569 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1570 { 1571 PetscErrorCode ierr; 1572 1573 PetscFunctionBegin; 1574 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1575 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1576 PetscFunctionReturn(0); 1577 } 1578 1579 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1583 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1584 { 1585 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1586 Mat C; 1587 PetscErrorCode ierr; 1588 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1589 PetscInt *rows,*cols,bs2=a->bs2; 1590 MatScalar *array; 1591 1592 PetscFunctionBegin; 1593 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1595 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1596 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1597 1598 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1599 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1600 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1601 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1602 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1603 ierr = PetscFree(col);CHKERRQ(ierr); 1604 } else { 1605 C = *B; 1606 } 1607 1608 array = a->a; 1609 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1610 for (i=0; i<mbs; i++) { 1611 cols[0] = i*bs; 1612 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1613 len = ai[i+1] - ai[i]; 1614 for (j=0; j<len; j++) { 1615 rows[0] = (*aj++)*bs; 1616 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1617 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1618 array += bs2; 1619 } 1620 } 1621 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1622 1623 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1624 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 1626 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1627 *B = C; 1628 } else { 1629 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1636 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1637 { 1638 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1639 PetscErrorCode ierr; 1640 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1641 int fd; 1642 PetscScalar *aa; 1643 FILE *file; 1644 1645 PetscFunctionBegin; 1646 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1647 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1648 col_lens[0] = MAT_FILE_CLASSID; 1649 1650 col_lens[1] = A->rmap->N; 1651 col_lens[2] = A->cmap->n; 1652 col_lens[3] = a->nz*bs2; 1653 1654 /* store lengths of each row and write (including header) to file */ 1655 count = 0; 1656 for (i=0; i<a->mbs; i++) { 1657 for (j=0; j<bs; j++) { 1658 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1659 } 1660 } 1661 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1662 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1663 1664 /* store column indices (zero start index) */ 1665 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1666 count = 0; 1667 for (i=0; i<a->mbs; i++) { 1668 for (j=0; j<bs; j++) { 1669 for (k=a->i[i]; k<a->i[i+1]; k++) { 1670 for (l=0; l<bs; l++) { 1671 jj[count++] = bs*a->j[k] + l; 1672 } 1673 } 1674 } 1675 } 1676 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1677 ierr = PetscFree(jj);CHKERRQ(ierr); 1678 1679 /* store nonzero values */ 1680 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1681 count = 0; 1682 for (i=0; i<a->mbs; i++) { 1683 for (j=0; j<bs; j++) { 1684 for (k=a->i[i]; k<a->i[i+1]; k++) { 1685 for (l=0; l<bs; l++) { 1686 aa[count++] = a->a[bs2*k + l*bs + j]; 1687 } 1688 } 1689 } 1690 } 1691 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1692 ierr = PetscFree(aa);CHKERRQ(ierr); 1693 1694 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1695 if (file) { 1696 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1703 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1706 PetscErrorCode ierr; 1707 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1708 PetscViewerFormat format; 1709 1710 PetscFunctionBegin; 1711 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1712 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1713 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1714 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1715 Mat aij; 1716 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1717 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1718 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1719 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1720 PetscFunctionReturn(0); 1721 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1722 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1723 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1724 for (i=0; i<a->mbs; i++) { 1725 for (j=0; j<bs; j++) { 1726 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1727 for (k=a->i[i]; k<a->i[i+1]; k++) { 1728 for (l=0; l<bs; l++) { 1729 #if defined(PETSC_USE_COMPLEX) 1730 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1731 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1732 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1733 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1734 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1735 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1736 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1738 } 1739 #else 1740 if (a->a[bs2*k + l*bs + j] != 0.0) { 1741 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1742 } 1743 #endif 1744 } 1745 } 1746 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1747 } 1748 } 1749 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1750 } else { 1751 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1752 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1753 for (i=0; i<a->mbs; i++) { 1754 for (j=0; j<bs; j++) { 1755 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1756 for (k=a->i[i]; k<a->i[i+1]; k++) { 1757 for (l=0; l<bs; l++) { 1758 #if defined(PETSC_USE_COMPLEX) 1759 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1760 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1761 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1762 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1763 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1764 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1765 } else { 1766 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1767 } 1768 #else 1769 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1770 #endif 1771 } 1772 } 1773 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1774 } 1775 } 1776 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1777 } 1778 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1784 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1785 { 1786 Mat A = (Mat) Aa; 1787 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1788 PetscErrorCode ierr; 1789 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1790 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1791 MatScalar *aa; 1792 PetscViewer viewer; 1793 1794 PetscFunctionBegin; 1795 1796 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1797 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1798 1799 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1800 1801 /* loop over matrix elements drawing boxes */ 1802 color = PETSC_DRAW_BLUE; 1803 for (i=0,row=0; i<mbs; i++,row+=bs) { 1804 for (j=a->i[i]; j<a->i[i+1]; j++) { 1805 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1806 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1807 aa = a->a + j*bs2; 1808 for (k=0; k<bs; k++) { 1809 for (l=0; l<bs; l++) { 1810 if (PetscRealPart(*aa++) >= 0.) continue; 1811 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1812 } 1813 } 1814 } 1815 } 1816 color = PETSC_DRAW_CYAN; 1817 for (i=0,row=0; i<mbs; i++,row+=bs) { 1818 for (j=a->i[i]; j<a->i[i+1]; j++) { 1819 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1820 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1821 aa = a->a + j*bs2; 1822 for (k=0; k<bs; k++) { 1823 for (l=0; l<bs; l++) { 1824 if (PetscRealPart(*aa++) != 0.) continue; 1825 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1826 } 1827 } 1828 } 1829 } 1830 1831 color = PETSC_DRAW_RED; 1832 for (i=0,row=0; i<mbs; i++,row+=bs) { 1833 for (j=a->i[i]; j<a->i[i+1]; j++) { 1834 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1835 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1836 aa = a->a + j*bs2; 1837 for (k=0; k<bs; k++) { 1838 for (l=0; l<bs; l++) { 1839 if (PetscRealPart(*aa++) <= 0.) continue; 1840 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1841 } 1842 } 1843 } 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1850 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1851 { 1852 PetscErrorCode ierr; 1853 PetscReal xl,yl,xr,yr,w,h; 1854 PetscDraw draw; 1855 PetscBool isnull; 1856 1857 PetscFunctionBegin; 1858 1859 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1860 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1861 1862 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1863 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1864 xr += w; yr += h; xl = -w; yl = -h; 1865 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1866 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1867 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatView_SeqBAIJ" 1873 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1874 { 1875 PetscErrorCode ierr; 1876 PetscBool iascii,isbinary,isdraw; 1877 1878 PetscFunctionBegin; 1879 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1880 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1881 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1882 if (iascii){ 1883 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1884 } else if (isbinary) { 1885 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1886 } else if (isdraw) { 1887 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1888 } else { 1889 Mat B; 1890 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1891 ierr = MatView(B,viewer);CHKERRQ(ierr); 1892 ierr = MatDestroy(&B);CHKERRQ(ierr); 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1900 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1901 { 1902 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1903 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1904 PetscInt *ai = a->i,*ailen = a->ilen; 1905 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1906 MatScalar *ap,*aa = a->a; 1907 1908 PetscFunctionBegin; 1909 for (k=0; k<m; k++) { /* loop over rows */ 1910 row = im[k]; brow = row/bs; 1911 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1912 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1913 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1914 nrow = ailen[brow]; 1915 for (l=0; l<n; l++) { /* loop over columns */ 1916 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1917 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1918 col = in[l] ; 1919 bcol = col/bs; 1920 cidx = col%bs; 1921 ridx = row%bs; 1922 high = nrow; 1923 low = 0; /* assume unsorted */ 1924 while (high-low > 5) { 1925 t = (low+high)/2; 1926 if (rp[t] > bcol) high = t; 1927 else low = t; 1928 } 1929 for (i=low; i<high; i++) { 1930 if (rp[i] > bcol) break; 1931 if (rp[i] == bcol) { 1932 *v++ = ap[bs2*i+bs*cidx+ridx]; 1933 goto finished; 1934 } 1935 } 1936 *v++ = 0.0; 1937 finished:; 1938 } 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1945 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1946 { 1947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1948 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1949 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1950 PetscErrorCode ierr; 1951 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1952 PetscBool roworiented=a->roworiented; 1953 const PetscScalar *value = v; 1954 MatScalar *ap,*aa = a->a,*bap; 1955 1956 PetscFunctionBegin; 1957 if (roworiented) { 1958 stepval = (n-1)*bs; 1959 } else { 1960 stepval = (m-1)*bs; 1961 } 1962 for (k=0; k<m; k++) { /* loop over added rows */ 1963 row = im[k]; 1964 if (row < 0) continue; 1965 #if defined(PETSC_USE_DEBUG) 1966 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1967 #endif 1968 rp = aj + ai[row]; 1969 ap = aa + bs2*ai[row]; 1970 rmax = imax[row]; 1971 nrow = ailen[row]; 1972 low = 0; 1973 high = nrow; 1974 for (l=0; l<n; l++) { /* loop over added columns */ 1975 if (in[l] < 0) continue; 1976 #if defined(PETSC_USE_DEBUG) 1977 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1978 #endif 1979 col = in[l]; 1980 if (roworiented) { 1981 value = v + (k*(stepval+bs) + l)*bs; 1982 } else { 1983 value = v + (l*(stepval+bs) + k)*bs; 1984 } 1985 if (col <= lastcol) low = 0; else high = nrow; 1986 lastcol = col; 1987 while (high-low > 7) { 1988 t = (low+high)/2; 1989 if (rp[t] > col) high = t; 1990 else low = t; 1991 } 1992 for (i=low; i<high; i++) { 1993 if (rp[i] > col) break; 1994 if (rp[i] == col) { 1995 bap = ap + bs2*i; 1996 if (roworiented) { 1997 if (is == ADD_VALUES) { 1998 for (ii=0; ii<bs; ii++,value+=stepval) { 1999 for (jj=ii; jj<bs2; jj+=bs) { 2000 bap[jj] += *value++; 2001 } 2002 } 2003 } else { 2004 for (ii=0; ii<bs; ii++,value+=stepval) { 2005 for (jj=ii; jj<bs2; jj+=bs) { 2006 bap[jj] = *value++; 2007 } 2008 } 2009 } 2010 } else { 2011 if (is == ADD_VALUES) { 2012 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2013 for (jj=0; jj<bs; jj++) { 2014 bap[jj] += value[jj]; 2015 } 2016 bap += bs; 2017 } 2018 } else { 2019 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2020 for (jj=0; jj<bs; jj++) { 2021 bap[jj] = value[jj]; 2022 } 2023 bap += bs; 2024 } 2025 } 2026 } 2027 goto noinsert2; 2028 } 2029 } 2030 if (nonew == 1) goto noinsert2; 2031 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2032 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2033 N = nrow++ - 1; high++; 2034 /* shift up all the later entries in this row */ 2035 for (ii=N; ii>=i; ii--) { 2036 rp[ii+1] = rp[ii]; 2037 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2038 } 2039 if (N >= i) { 2040 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2041 } 2042 rp[i] = col; 2043 bap = ap + bs2*i; 2044 if (roworiented) { 2045 for (ii=0; ii<bs; ii++,value+=stepval) { 2046 for (jj=ii; jj<bs2; jj+=bs) { 2047 bap[jj] = *value++; 2048 } 2049 } 2050 } else { 2051 for (ii=0; ii<bs; ii++,value+=stepval) { 2052 for (jj=0; jj<bs; jj++) { 2053 *bap++ = *value++; 2054 } 2055 } 2056 } 2057 noinsert2:; 2058 low = i; 2059 } 2060 ailen[row] = nrow; 2061 } 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2067 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2068 { 2069 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2070 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2071 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2072 PetscErrorCode ierr; 2073 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2074 MatScalar *aa = a->a,*ap; 2075 PetscReal ratio=0.6; 2076 2077 PetscFunctionBegin; 2078 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2079 2080 if (m) rmax = ailen[0]; 2081 for (i=1; i<mbs; i++) { 2082 /* move each row back by the amount of empty slots (fshift) before it*/ 2083 fshift += imax[i-1] - ailen[i-1]; 2084 rmax = PetscMax(rmax,ailen[i]); 2085 if (fshift) { 2086 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2087 N = ailen[i]; 2088 for (j=0; j<N; j++) { 2089 ip[j-fshift] = ip[j]; 2090 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2091 } 2092 } 2093 ai[i] = ai[i-1] + ailen[i-1]; 2094 } 2095 if (mbs) { 2096 fshift += imax[mbs-1] - ailen[mbs-1]; 2097 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2098 } 2099 /* reset ilen and imax for each row */ 2100 for (i=0; i<mbs; i++) { 2101 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2102 } 2103 a->nz = ai[mbs]; 2104 2105 /* diagonals may have moved, so kill the diagonal pointers */ 2106 a->idiagvalid = PETSC_FALSE; 2107 if (fshift && a->diag) { 2108 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2109 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2110 a->diag = 0; 2111 } 2112 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2113 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2114 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2115 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2116 A->info.mallocs += a->reallocs; 2117 a->reallocs = 0; 2118 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2119 2120 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2121 A->same_nonzero = PETSC_TRUE; 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /* 2126 This function returns an array of flags which indicate the locations of contiguous 2127 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2128 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2129 Assume: sizes should be long enough to hold all the values. 2130 */ 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2133 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2134 { 2135 PetscInt i,j,k,row; 2136 PetscBool flg; 2137 2138 PetscFunctionBegin; 2139 for (i=0,j=0; i<n; j++) { 2140 row = idx[i]; 2141 if (row%bs!=0) { /* Not the begining of a block */ 2142 sizes[j] = 1; 2143 i++; 2144 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2145 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2146 i++; 2147 } else { /* Begining of the block, so check if the complete block exists */ 2148 flg = PETSC_TRUE; 2149 for (k=1; k<bs; k++) { 2150 if (row+k != idx[i+k]) { /* break in the block */ 2151 flg = PETSC_FALSE; 2152 break; 2153 } 2154 } 2155 if (flg) { /* No break in the bs */ 2156 sizes[j] = bs; 2157 i+= bs; 2158 } else { 2159 sizes[j] = 1; 2160 i++; 2161 } 2162 } 2163 } 2164 *bs_max = j; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2170 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2171 { 2172 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2173 PetscErrorCode ierr; 2174 PetscInt i,j,k,count,*rows; 2175 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2176 PetscScalar zero = 0.0; 2177 MatScalar *aa; 2178 const PetscScalar *xx; 2179 PetscScalar *bb; 2180 2181 PetscFunctionBegin; 2182 /* fix right hand side if needed */ 2183 if (x && b) { 2184 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2185 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2186 for (i=0; i<is_n; i++) { 2187 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2188 } 2189 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2190 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2191 } 2192 2193 /* Make a copy of the IS and sort it */ 2194 /* allocate memory for rows,sizes */ 2195 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2196 2197 /* copy IS values to rows, and sort them */ 2198 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2199 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2200 2201 if (baij->keepnonzeropattern) { 2202 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2203 bs_max = is_n; 2204 A->same_nonzero = PETSC_TRUE; 2205 } else { 2206 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2207 A->same_nonzero = PETSC_FALSE; 2208 } 2209 2210 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2211 row = rows[j]; 2212 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2213 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2214 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2215 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2216 if (diag != 0.0) { 2217 if (baij->ilen[row/bs] > 0) { 2218 baij->ilen[row/bs] = 1; 2219 baij->j[baij->i[row/bs]] = row/bs; 2220 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2221 } 2222 /* Now insert all the diagonal values for this bs */ 2223 for (k=0; k<bs; k++) { 2224 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2225 } 2226 } else { /* (diag == 0.0) */ 2227 baij->ilen[row/bs] = 0; 2228 } /* end (diag == 0.0) */ 2229 } else { /* (sizes[i] != bs) */ 2230 #if defined (PETSC_USE_DEBUG) 2231 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2232 #endif 2233 for (k=0; k<count; k++) { 2234 aa[0] = zero; 2235 aa += bs; 2236 } 2237 if (diag != 0.0) { 2238 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2239 } 2240 } 2241 } 2242 2243 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2244 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2250 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2251 { 2252 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2253 PetscErrorCode ierr; 2254 PetscInt i,j,k,count; 2255 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2256 PetscScalar zero = 0.0; 2257 MatScalar *aa; 2258 const PetscScalar *xx; 2259 PetscScalar *bb; 2260 PetscBool *zeroed,vecs = PETSC_FALSE; 2261 2262 PetscFunctionBegin; 2263 /* fix right hand side if needed */ 2264 if (x && b) { 2265 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2266 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2267 vecs = PETSC_TRUE; 2268 } 2269 A->same_nonzero = PETSC_TRUE; 2270 2271 /* zero the columns */ 2272 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2273 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2274 for (i=0; i<is_n; i++) { 2275 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2276 zeroed[is_idx[i]] = PETSC_TRUE; 2277 } 2278 for (i=0; i<A->rmap->N; i++) { 2279 if (!zeroed[i]) { 2280 row = i/bs; 2281 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2282 for (k=0; k<bs; k++) { 2283 col = bs*baij->j[j] + k; 2284 if (zeroed[col]) { 2285 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2286 if (vecs) bb[i] -= aa[0]*xx[col]; 2287 aa[0] = 0.0; 2288 } 2289 } 2290 } 2291 } else if (vecs) bb[i] = diag*xx[i]; 2292 } 2293 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2294 if (vecs) { 2295 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2296 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2297 } 2298 2299 /* zero the rows */ 2300 for (i=0; i<is_n; i++) { 2301 row = is_idx[i]; 2302 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2303 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2304 for (k=0; k<count; k++) { 2305 aa[0] = zero; 2306 aa += bs; 2307 } 2308 if (diag != 0.0) { 2309 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2310 } 2311 } 2312 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2318 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2319 { 2320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2321 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2322 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2323 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2324 PetscErrorCode ierr; 2325 PetscInt ridx,cidx,bs2=a->bs2; 2326 PetscBool roworiented=a->roworiented; 2327 MatScalar *ap,value,*aa=a->a,*bap; 2328 2329 PetscFunctionBegin; 2330 if (v) PetscValidScalarPointer(v,6); 2331 for (k=0; k<m; k++) { /* loop over added rows */ 2332 row = im[k]; 2333 brow = row/bs; 2334 if (row < 0) continue; 2335 #if defined(PETSC_USE_DEBUG) 2336 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2337 #endif 2338 rp = aj + ai[brow]; 2339 ap = aa + bs2*ai[brow]; 2340 rmax = imax[brow]; 2341 nrow = ailen[brow]; 2342 low = 0; 2343 high = nrow; 2344 for (l=0; l<n; l++) { /* loop over added columns */ 2345 if (in[l] < 0) continue; 2346 #if defined(PETSC_USE_DEBUG) 2347 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2348 #endif 2349 col = in[l]; bcol = col/bs; 2350 ridx = row % bs; cidx = col % bs; 2351 if (roworiented) { 2352 value = v[l + k*n]; 2353 } else { 2354 value = v[k + l*m]; 2355 } 2356 if (col <= lastcol) low = 0; else high = nrow; 2357 lastcol = col; 2358 while (high-low > 7) { 2359 t = (low+high)/2; 2360 if (rp[t] > bcol) high = t; 2361 else low = t; 2362 } 2363 for (i=low; i<high; i++) { 2364 if (rp[i] > bcol) break; 2365 if (rp[i] == bcol) { 2366 bap = ap + bs2*i + bs*cidx + ridx; 2367 if (is == ADD_VALUES) *bap += value; 2368 else *bap = value; 2369 goto noinsert1; 2370 } 2371 } 2372 if (nonew == 1) goto noinsert1; 2373 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2374 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2375 N = nrow++ - 1; high++; 2376 /* shift up all the later entries in this row */ 2377 for (ii=N; ii>=i; ii--) { 2378 rp[ii+1] = rp[ii]; 2379 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2380 } 2381 if (N>=i) { 2382 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2383 } 2384 rp[i] = bcol; 2385 ap[bs2*i + bs*cidx + ridx] = value; 2386 a->nz++; 2387 noinsert1:; 2388 low = i; 2389 } 2390 ailen[brow] = nrow; 2391 } 2392 A->same_nonzero = PETSC_FALSE; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2398 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2399 { 2400 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2401 Mat outA; 2402 PetscErrorCode ierr; 2403 PetscBool row_identity,col_identity; 2404 2405 PetscFunctionBegin; 2406 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2407 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2408 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2409 if (!row_identity || !col_identity) { 2410 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2411 } 2412 2413 outA = inA; 2414 inA->factortype = MAT_FACTOR_LU; 2415 2416 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2417 2418 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2419 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2420 a->row = row; 2421 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2422 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2423 a->col = col; 2424 2425 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2426 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2427 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2428 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2429 2430 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2431 if (!a->solve_work) { 2432 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2433 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2434 } 2435 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2436 2437 PetscFunctionReturn(0); 2438 } 2439 2440 EXTERN_C_BEGIN 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2443 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2444 { 2445 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2446 PetscInt i,nz,mbs; 2447 2448 PetscFunctionBegin; 2449 nz = baij->maxnz; 2450 mbs = baij->mbs; 2451 for (i=0; i<nz; i++) { 2452 baij->j[i] = indices[i]; 2453 } 2454 baij->nz = nz; 2455 for (i=0; i<mbs; i++) { 2456 baij->ilen[i] = baij->imax[i]; 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 EXTERN_C_END 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2464 /*@ 2465 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2466 in the matrix. 2467 2468 Input Parameters: 2469 + mat - the SeqBAIJ matrix 2470 - indices - the column indices 2471 2472 Level: advanced 2473 2474 Notes: 2475 This can be called if you have precomputed the nonzero structure of the 2476 matrix and want to provide it to the matrix object to improve the performance 2477 of the MatSetValues() operation. 2478 2479 You MUST have set the correct numbers of nonzeros per row in the call to 2480 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2481 2482 MUST be called before any calls to MatSetValues(); 2483 2484 @*/ 2485 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2486 { 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2491 PetscValidPointer(indices,2); 2492 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2498 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2499 { 2500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2501 PetscErrorCode ierr; 2502 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2503 PetscReal atmp; 2504 PetscScalar *x,zero = 0.0; 2505 MatScalar *aa; 2506 PetscInt ncols,brow,krow,kcol; 2507 2508 PetscFunctionBegin; 2509 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2510 bs = A->rmap->bs; 2511 aa = a->a; 2512 ai = a->i; 2513 aj = a->j; 2514 mbs = a->mbs; 2515 2516 ierr = VecSet(v,zero);CHKERRQ(ierr); 2517 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2518 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2519 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2520 for (i=0; i<mbs; i++) { 2521 ncols = ai[1] - ai[0]; ai++; 2522 brow = bs*i; 2523 for (j=0; j<ncols; j++){ 2524 for (kcol=0; kcol<bs; kcol++){ 2525 for (krow=0; krow<bs; krow++){ 2526 atmp = PetscAbsScalar(*aa);aa++; 2527 row = brow + krow; /* row index */ 2528 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2529 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2530 } 2531 } 2532 aj++; 2533 } 2534 } 2535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatCopy_SeqBAIJ" 2541 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2542 { 2543 PetscErrorCode ierr; 2544 2545 PetscFunctionBegin; 2546 /* If the two matrices have the same copy implementation, use fast copy. */ 2547 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2549 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2550 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2551 2552 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2553 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2554 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2555 } else { 2556 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2563 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2564 { 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2574 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2575 { 2576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2577 PetscFunctionBegin; 2578 *array = a->a; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2584 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2585 { 2586 PetscFunctionBegin; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2592 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2593 { 2594 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2595 PetscErrorCode ierr; 2596 PetscInt i,bs=Y->rmap->bs,j,bs2; 2597 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2598 2599 PetscFunctionBegin; 2600 if (str == SAME_NONZERO_PATTERN) { 2601 PetscScalar alpha = a; 2602 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2603 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2604 if (y->xtoy && y->XtoY != X) { 2605 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2606 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2607 } 2608 if (!y->xtoy) { /* get xtoy */ 2609 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2610 y->XtoY = X; 2611 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2612 } 2613 bs2 = bs*bs; 2614 for (i=0; i<x->nz; i++) { 2615 j = 0; 2616 while (j < bs2){ 2617 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2618 j++; 2619 } 2620 } 2621 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2622 } else { 2623 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2624 } 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2630 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2631 { 2632 PetscInt rbs,cbs; 2633 PetscErrorCode ierr; 2634 2635 PetscFunctionBegin; 2636 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2637 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2638 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2639 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2640 PetscFunctionReturn(0); 2641 } 2642 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2645 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2646 { 2647 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2648 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2649 MatScalar *aa = a->a; 2650 2651 PetscFunctionBegin; 2652 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2658 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2659 { 2660 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2661 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2662 MatScalar *aa = a->a; 2663 2664 PetscFunctionBegin; 2665 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2666 PetscFunctionReturn(0); 2667 } 2668 2669 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2670 2671 #undef __FUNCT__ 2672 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2673 /* 2674 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2675 */ 2676 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2677 { 2678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2679 PetscErrorCode ierr; 2680 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2681 PetscInt nz = a->i[m],row,*jj,mr,col; 2682 2683 PetscFunctionBegin; 2684 *nn = n; 2685 if (!ia) PetscFunctionReturn(0); 2686 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2687 else { 2688 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2689 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2690 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2691 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2692 jj = a->j; 2693 for (i=0; i<nz; i++) { 2694 collengths[jj[i]]++; 2695 } 2696 cia[0] = oshift; 2697 for (i=0; i<n; i++) { 2698 cia[i+1] = cia[i] + collengths[i]; 2699 } 2700 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2701 jj = a->j; 2702 for (row=0; row<m; row++) { 2703 mr = a->i[row+1] - a->i[row]; 2704 for (i=0; i<mr; i++) { 2705 col = *jj++; 2706 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2707 } 2708 } 2709 ierr = PetscFree(collengths);CHKERRQ(ierr); 2710 *ia = cia; *ja = cja; 2711 } 2712 PetscFunctionReturn(0); 2713 } 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2717 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2718 { 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 if (!ia) PetscFunctionReturn(0); 2723 ierr = PetscFree(*ia);CHKERRQ(ierr); 2724 ierr = PetscFree(*ja);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2730 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2731 { 2732 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2733 PetscErrorCode ierr; 2734 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2735 PetscScalar dx,*y,*xx,*w3_array; 2736 PetscScalar *vscale_array; 2737 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2738 Vec w1=coloring->w1,w2=coloring->w2,w3; 2739 void *fctx = coloring->fctx; 2740 PetscBool flg = PETSC_FALSE; 2741 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2742 Vec x1_tmp; 2743 2744 PetscFunctionBegin; 2745 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2746 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2747 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2748 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2749 2750 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2751 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2752 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2753 if (flg) { 2754 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2755 } else { 2756 PetscBool assembled; 2757 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2758 if (assembled) { 2759 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2760 } 2761 } 2762 2763 x1_tmp = x1; 2764 if (!coloring->vscale){ 2765 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2766 } 2767 2768 /* 2769 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2770 coloring->F for the coarser grids from the finest 2771 */ 2772 if (coloring->F) { 2773 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2774 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2775 if (m1 != m2) { 2776 coloring->F = 0; 2777 } 2778 } 2779 2780 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2781 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2782 } 2783 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2784 2785 /* Set w1 = F(x1) */ 2786 if (coloring->F) { 2787 w1 = coloring->F; /* use already computed value of function */ 2788 coloring->F = 0; 2789 } else { 2790 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2791 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2792 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2793 } 2794 2795 if (!coloring->w3) { 2796 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2798 } 2799 w3 = coloring->w3; 2800 2801 CHKMEMQ; 2802 /* Compute all the local scale factors, including ghost points */ 2803 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2804 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2805 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2806 if (ctype == IS_COLORING_GHOSTED){ 2807 col_start = 0; col_end = N; 2808 } else if (ctype == IS_COLORING_GLOBAL){ 2809 xx = xx - start; 2810 vscale_array = vscale_array - start; 2811 col_start = start; col_end = N + start; 2812 } CHKMEMQ; 2813 for (col=col_start; col<col_end; col++){ 2814 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2815 if (coloring->htype[0] == 'w') { 2816 dx = 1.0 + unorm; 2817 } else { 2818 dx = xx[col]; 2819 } 2820 if (dx == 0.0) dx = 1.0; 2821 #if !defined(PETSC_USE_COMPLEX) 2822 if (dx < umin && dx >= 0.0) dx = umin; 2823 else if (dx < 0.0 && dx > -umin) dx = -umin; 2824 #else 2825 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2826 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2827 #endif 2828 dx *= epsilon; 2829 vscale_array[col] = 1.0/dx; 2830 } CHKMEMQ; 2831 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2832 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2833 if (ctype == IS_COLORING_GLOBAL){ 2834 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2835 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2836 } 2837 CHKMEMQ; 2838 if (coloring->vscaleforrow) { 2839 vscaleforrow = coloring->vscaleforrow; 2840 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2841 2842 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2843 /* 2844 Loop over each color 2845 */ 2846 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2847 for (k=0; k<coloring->ncolors; k++) { 2848 coloring->currentcolor = k; 2849 for (i=0; i<bs; i++) { 2850 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2851 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2852 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2853 /* 2854 Loop over each column associated with color 2855 adding the perturbation to the vector w3. 2856 */ 2857 for (l=0; l<coloring->ncolumns[k]; l++) { 2858 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2859 if (coloring->htype[0] == 'w') { 2860 dx = 1.0 + unorm; 2861 } else { 2862 dx = xx[col]; 2863 } 2864 if (dx == 0.0) dx = 1.0; 2865 #if !defined(PETSC_USE_COMPLEX) 2866 if (dx < umin && dx >= 0.0) dx = umin; 2867 else if (dx < 0.0 && dx > -umin) dx = -umin; 2868 #else 2869 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2870 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2871 #endif 2872 dx *= epsilon; 2873 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2874 w3_array[col] += dx; 2875 } 2876 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2877 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2878 2879 /* 2880 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2881 w2 = F(x1 + dx) - F(x1) 2882 */ 2883 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2884 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2885 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2886 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2887 2888 /* 2889 Loop over rows of vector, putting results into Jacobian matrix 2890 */ 2891 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2892 for (l=0; l<coloring->nrows[k]; l++) { 2893 row = bs*coloring->rows[k][l]; /* local row index */ 2894 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2895 for (j=0; j<bs; j++) { 2896 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2897 srows[j] = row + start + j; 2898 } 2899 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2900 } 2901 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2902 } 2903 } /* endof for each color */ 2904 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2905 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2906 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2907 ierr = PetscFree(srows);CHKERRQ(ierr); 2908 2909 coloring->currentcolor = -1; 2910 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2911 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2912 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 /* -------------------------------------------------------------------*/ 2917 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2918 MatGetRow_SeqBAIJ, 2919 MatRestoreRow_SeqBAIJ, 2920 MatMult_SeqBAIJ_N, 2921 /* 4*/ MatMultAdd_SeqBAIJ_N, 2922 MatMultTranspose_SeqBAIJ, 2923 MatMultTransposeAdd_SeqBAIJ, 2924 0, 2925 0, 2926 0, 2927 /*10*/ 0, 2928 MatLUFactor_SeqBAIJ, 2929 0, 2930 0, 2931 MatTranspose_SeqBAIJ, 2932 /*15*/ MatGetInfo_SeqBAIJ, 2933 MatEqual_SeqBAIJ, 2934 MatGetDiagonal_SeqBAIJ, 2935 MatDiagonalScale_SeqBAIJ, 2936 MatNorm_SeqBAIJ, 2937 /*20*/ 0, 2938 MatAssemblyEnd_SeqBAIJ, 2939 MatSetOption_SeqBAIJ, 2940 MatZeroEntries_SeqBAIJ, 2941 /*24*/ MatZeroRows_SeqBAIJ, 2942 0, 2943 0, 2944 0, 2945 0, 2946 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2947 0, 2948 0, 2949 MatGetArray_SeqBAIJ, 2950 MatRestoreArray_SeqBAIJ, 2951 /*34*/ MatDuplicate_SeqBAIJ, 2952 0, 2953 0, 2954 MatILUFactor_SeqBAIJ, 2955 0, 2956 /*39*/ MatAXPY_SeqBAIJ, 2957 MatGetSubMatrices_SeqBAIJ, 2958 MatIncreaseOverlap_SeqBAIJ, 2959 MatGetValues_SeqBAIJ, 2960 MatCopy_SeqBAIJ, 2961 /*44*/ 0, 2962 MatScale_SeqBAIJ, 2963 0, 2964 0, 2965 MatZeroRowsColumns_SeqBAIJ, 2966 /*49*/ MatSetBlockSize_SeqBAIJ, 2967 MatGetRowIJ_SeqBAIJ, 2968 MatRestoreRowIJ_SeqBAIJ, 2969 MatGetColumnIJ_SeqBAIJ, 2970 MatRestoreColumnIJ_SeqBAIJ, 2971 /*54*/ MatFDColoringCreate_SeqAIJ, 2972 0, 2973 0, 2974 0, 2975 MatSetValuesBlocked_SeqBAIJ, 2976 /*59*/ MatGetSubMatrix_SeqBAIJ, 2977 MatDestroy_SeqBAIJ, 2978 MatView_SeqBAIJ, 2979 0, 2980 0, 2981 /*64*/ 0, 2982 0, 2983 0, 2984 0, 2985 0, 2986 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2987 0, 2988 MatConvert_Basic, 2989 0, 2990 0, 2991 /*74*/ 0, 2992 MatFDColoringApply_BAIJ, 2993 0, 2994 0, 2995 0, 2996 /*79*/ 0, 2997 0, 2998 0, 2999 0, 3000 MatLoad_SeqBAIJ, 3001 /*84*/ 0, 3002 0, 3003 0, 3004 0, 3005 0, 3006 /*89*/ 0, 3007 0, 3008 0, 3009 0, 3010 0, 3011 /*94*/ 0, 3012 0, 3013 0, 3014 0, 3015 0, 3016 /*99*/0, 3017 0, 3018 0, 3019 0, 3020 0, 3021 /*104*/0, 3022 MatRealPart_SeqBAIJ, 3023 MatImaginaryPart_SeqBAIJ, 3024 0, 3025 0, 3026 /*109*/0, 3027 0, 3028 0, 3029 0, 3030 MatMissingDiagonal_SeqBAIJ, 3031 /*114*/0, 3032 0, 3033 0, 3034 0, 3035 0, 3036 /*119*/0, 3037 0, 3038 MatMultHermitianTranspose_SeqBAIJ, 3039 MatMultHermitianTransposeAdd_SeqBAIJ, 3040 0, 3041 }; 3042 3043 EXTERN_C_BEGIN 3044 #undef __FUNCT__ 3045 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3046 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3047 { 3048 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3049 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3050 PetscErrorCode ierr; 3051 3052 PetscFunctionBegin; 3053 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3054 3055 /* allocate space for values if not already there */ 3056 if (!aij->saved_values) { 3057 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3058 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3059 } 3060 3061 /* copy values over */ 3062 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 EXTERN_C_BEGIN 3068 #undef __FUNCT__ 3069 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3070 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3071 { 3072 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3073 PetscErrorCode ierr; 3074 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3075 3076 PetscFunctionBegin; 3077 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3078 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3079 3080 /* copy values over */ 3081 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3082 PetscFunctionReturn(0); 3083 } 3084 EXTERN_C_END 3085 3086 EXTERN_C_BEGIN 3087 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3088 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3089 EXTERN_C_END 3090 3091 EXTERN_C_BEGIN 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3094 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3095 { 3096 Mat_SeqBAIJ *b; 3097 PetscErrorCode ierr; 3098 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3099 PetscBool flg,skipallocation = PETSC_FALSE; 3100 3101 PetscFunctionBegin; 3102 3103 if (nz == MAT_SKIP_ALLOCATION) { 3104 skipallocation = PETSC_TRUE; 3105 nz = 0; 3106 } 3107 3108 if (bs < 0) { 3109 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3110 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3111 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3112 bs = PetscAbs(bs); 3113 } 3114 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3115 bs = newbs; 3116 3117 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3118 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3119 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3120 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3121 3122 B->preallocated = PETSC_TRUE; 3123 3124 mbs = B->rmap->n/bs; 3125 nbs = B->cmap->n/bs; 3126 bs2 = bs*bs; 3127 3128 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3129 3130 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3131 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3132 if (nnz) { 3133 for (i=0; i<mbs; i++) { 3134 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3135 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3136 } 3137 } 3138 3139 b = (Mat_SeqBAIJ*)B->data; 3140 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3141 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3142 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3143 3144 if (!flg) { 3145 switch (bs) { 3146 case 1: 3147 B->ops->mult = MatMult_SeqBAIJ_1; 3148 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3149 B->ops->sor = MatSOR_SeqBAIJ_1; 3150 break; 3151 case 2: 3152 B->ops->mult = MatMult_SeqBAIJ_2; 3153 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3154 B->ops->sor = MatSOR_SeqBAIJ_2; 3155 break; 3156 case 3: 3157 B->ops->mult = MatMult_SeqBAIJ_3; 3158 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3159 B->ops->sor = MatSOR_SeqBAIJ_3; 3160 break; 3161 case 4: 3162 B->ops->mult = MatMult_SeqBAIJ_4; 3163 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3164 B->ops->sor = MatSOR_SeqBAIJ_4; 3165 break; 3166 case 5: 3167 B->ops->mult = MatMult_SeqBAIJ_5; 3168 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3169 B->ops->sor = MatSOR_SeqBAIJ_5; 3170 break; 3171 case 6: 3172 B->ops->mult = MatMult_SeqBAIJ_6; 3173 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3174 B->ops->sor = MatSOR_SeqBAIJ_6; 3175 break; 3176 case 7: 3177 B->ops->mult = MatMult_SeqBAIJ_7; 3178 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3179 B->ops->sor = MatSOR_SeqBAIJ_7; 3180 break; 3181 case 15: 3182 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3183 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3184 B->ops->sor = MatSOR_SeqBAIJ_N; 3185 break; 3186 default: 3187 B->ops->mult = MatMult_SeqBAIJ_N; 3188 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3189 B->ops->sor = MatSOR_SeqBAIJ_N; 3190 break; 3191 } 3192 } 3193 B->rmap->bs = bs; 3194 b->mbs = mbs; 3195 b->nbs = nbs; 3196 if (!skipallocation) { 3197 if (!b->imax) { 3198 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3199 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3200 b->free_imax_ilen = PETSC_TRUE; 3201 } 3202 /* b->ilen will count nonzeros in each block row so far. */ 3203 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3204 if (!nnz) { 3205 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3206 else if (nz < 0) nz = 1; 3207 for (i=0; i<mbs; i++) b->imax[i] = nz; 3208 nz = nz*mbs; 3209 } else { 3210 nz = 0; 3211 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3212 } 3213 3214 /* allocate the matrix space */ 3215 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3216 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3217 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3218 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3219 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3220 b->singlemalloc = PETSC_TRUE; 3221 b->i[0] = 0; 3222 for (i=1; i<mbs+1; i++) { 3223 b->i[i] = b->i[i-1] + b->imax[i-1]; 3224 } 3225 b->free_a = PETSC_TRUE; 3226 b->free_ij = PETSC_TRUE; 3227 } else { 3228 b->free_a = PETSC_FALSE; 3229 b->free_ij = PETSC_FALSE; 3230 } 3231 3232 B->rmap->bs = bs; 3233 b->bs2 = bs2; 3234 b->mbs = mbs; 3235 b->nz = 0; 3236 b->maxnz = nz; 3237 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3238 PetscFunctionReturn(0); 3239 } 3240 EXTERN_C_END 3241 3242 EXTERN_C_BEGIN 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3245 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3246 { 3247 PetscInt i,m,nz,nz_max=0,*nnz; 3248 PetscScalar *values=0; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3253 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3254 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3255 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3256 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3257 m = B->rmap->n/bs; 3258 3259 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3260 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3261 for(i=0; i<m; i++) { 3262 nz = ii[i+1]- ii[i]; 3263 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3264 nz_max = PetscMax(nz_max, nz); 3265 nnz[i] = nz; 3266 } 3267 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3268 ierr = PetscFree(nnz);CHKERRQ(ierr); 3269 3270 values = (PetscScalar*)V; 3271 if (!values) { 3272 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3273 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3274 } 3275 for (i=0; i<m; i++) { 3276 PetscInt ncols = ii[i+1] - ii[i]; 3277 const PetscInt *icols = jj + ii[i]; 3278 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3279 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3280 } 3281 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3282 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 3285 PetscFunctionReturn(0); 3286 } 3287 EXTERN_C_END 3288 3289 3290 EXTERN_C_BEGIN 3291 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3292 #if defined(PETSC_HAVE_MUMPS) 3293 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3294 #endif 3295 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3296 EXTERN_C_END 3297 3298 /*MC 3299 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3300 block sparse compressed row format. 3301 3302 Options Database Keys: 3303 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3304 3305 Level: beginner 3306 3307 .seealso: MatCreateSeqBAIJ() 3308 M*/ 3309 3310 EXTERN_C_BEGIN 3311 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3312 EXTERN_C_END 3313 3314 EXTERN_C_BEGIN 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatCreate_SeqBAIJ" 3317 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3318 { 3319 PetscErrorCode ierr; 3320 PetscMPIInt size; 3321 Mat_SeqBAIJ *b; 3322 3323 PetscFunctionBegin; 3324 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3325 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3326 3327 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3328 B->data = (void*)b; 3329 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3330 b->row = 0; 3331 b->col = 0; 3332 b->icol = 0; 3333 b->reallocs = 0; 3334 b->saved_values = 0; 3335 3336 b->roworiented = PETSC_TRUE; 3337 b->nonew = 0; 3338 b->diag = 0; 3339 b->solve_work = 0; 3340 b->mult_work = 0; 3341 B->spptr = 0; 3342 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3343 b->keepnonzeropattern = PETSC_FALSE; 3344 b->xtoy = 0; 3345 b->XtoY = 0; 3346 B->same_nonzero = PETSC_FALSE; 3347 3348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3349 "MatGetFactorAvailable_seqbaij_petsc", 3350 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3352 "MatGetFactor_seqbaij_petsc", 3353 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3354 #if defined(PETSC_HAVE_MUMPS) 3355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3356 #endif 3357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3358 "MatInvertBlockDiagonal_SeqBAIJ", 3359 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3361 "MatStoreValues_SeqBAIJ", 3362 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3364 "MatRetrieveValues_SeqBAIJ", 3365 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3367 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3368 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3370 "MatConvert_SeqBAIJ_SeqAIJ", 3371 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3373 "MatConvert_SeqBAIJ_SeqSBAIJ", 3374 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3376 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3377 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3379 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3380 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3382 "MatConvert_SeqBAIJ_SeqBSTRM", 3383 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3384 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } 3387 EXTERN_C_END 3388 3389 #undef __FUNCT__ 3390 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3391 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3392 { 3393 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3394 PetscErrorCode ierr; 3395 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3396 3397 PetscFunctionBegin; 3398 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3399 3400 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3401 c->imax = a->imax; 3402 c->ilen = a->ilen; 3403 c->free_imax_ilen = PETSC_FALSE; 3404 } else { 3405 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3407 for (i=0; i<mbs; i++) { 3408 c->imax[i] = a->imax[i]; 3409 c->ilen[i] = a->ilen[i]; 3410 } 3411 c->free_imax_ilen = PETSC_TRUE; 3412 } 3413 3414 /* allocate the matrix space */ 3415 if (mallocmatspace){ 3416 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3417 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3418 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3419 c->singlemalloc = PETSC_FALSE; 3420 c->free_ij = PETSC_FALSE; 3421 c->i = a->i; 3422 c->j = a->j; 3423 c->parent = A; 3424 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3425 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3426 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3427 } else { 3428 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3429 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3430 c->singlemalloc = PETSC_TRUE; 3431 c->free_ij = PETSC_TRUE; 3432 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3433 if (mbs > 0) { 3434 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3435 if (cpvalues == MAT_COPY_VALUES) { 3436 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3437 } else { 3438 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3439 } 3440 } 3441 } 3442 } 3443 3444 c->roworiented = a->roworiented; 3445 c->nonew = a->nonew; 3446 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3447 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3448 c->bs2 = a->bs2; 3449 c->mbs = a->mbs; 3450 c->nbs = a->nbs; 3451 3452 if (a->diag) { 3453 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3454 c->diag = a->diag; 3455 c->free_diag = PETSC_FALSE; 3456 } else { 3457 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3458 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3459 for (i=0; i<mbs; i++) { 3460 c->diag[i] = a->diag[i]; 3461 } 3462 c->free_diag = PETSC_TRUE; 3463 } 3464 } else c->diag = 0; 3465 c->nz = a->nz; 3466 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3467 c->solve_work = 0; 3468 c->mult_work = 0; 3469 c->free_a = PETSC_TRUE; 3470 c->free_ij = PETSC_TRUE; 3471 C->preallocated = PETSC_TRUE; 3472 C->assembled = PETSC_TRUE; 3473 3474 c->compressedrow.use = a->compressedrow.use; 3475 c->compressedrow.nrows = a->compressedrow.nrows; 3476 c->compressedrow.check = a->compressedrow.check; 3477 if (a->compressedrow.use){ 3478 i = a->compressedrow.nrows; 3479 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3480 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3481 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3482 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3483 } else { 3484 c->compressedrow.use = PETSC_FALSE; 3485 c->compressedrow.i = PETSC_NULL; 3486 c->compressedrow.rindex = PETSC_NULL; 3487 } 3488 C->same_nonzero = A->same_nonzero; 3489 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3490 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3496 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3497 { 3498 PetscErrorCode ierr; 3499 3500 PetscFunctionBegin; 3501 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3502 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3503 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3504 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatLoad_SeqBAIJ" 3510 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3511 { 3512 Mat_SeqBAIJ *a; 3513 PetscErrorCode ierr; 3514 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3515 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3516 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3517 PetscInt *masked,nmask,tmp,bs2,ishift; 3518 PetscMPIInt size; 3519 int fd; 3520 PetscScalar *aa; 3521 MPI_Comm comm = ((PetscObject)viewer)->comm; 3522 3523 PetscFunctionBegin; 3524 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3525 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3526 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3527 bs2 = bs*bs; 3528 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3531 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3532 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3533 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3534 M = header[1]; N = header[2]; nz = header[3]; 3535 3536 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3537 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3538 3539 /* 3540 This code adds extra rows to make sure the number of rows is 3541 divisible by the blocksize 3542 */ 3543 mbs = M/bs; 3544 extra_rows = bs - M + bs*(mbs); 3545 if (extra_rows == bs) extra_rows = 0; 3546 else mbs++; 3547 if (extra_rows) { 3548 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3549 } 3550 3551 /* Set global sizes if not already set */ 3552 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3553 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3554 } else { /* Check if the matrix global sizes are correct */ 3555 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3556 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3557 } 3558 3559 /* read in row lengths */ 3560 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3561 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3562 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3563 3564 /* read in column indices */ 3565 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3566 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3567 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3568 3569 /* loop over row lengths determining block row lengths */ 3570 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3571 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3572 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3573 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3574 rowcount = 0; 3575 nzcount = 0; 3576 for (i=0; i<mbs; i++) { 3577 nmask = 0; 3578 for (j=0; j<bs; j++) { 3579 kmax = rowlengths[rowcount]; 3580 for (k=0; k<kmax; k++) { 3581 tmp = jj[nzcount++]/bs; 3582 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3583 } 3584 rowcount++; 3585 } 3586 browlengths[i] += nmask; 3587 /* zero out the mask elements we set */ 3588 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3589 } 3590 3591 /* Do preallocation */ 3592 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3593 a = (Mat_SeqBAIJ*)newmat->data; 3594 3595 /* set matrix "i" values */ 3596 a->i[0] = 0; 3597 for (i=1; i<= mbs; i++) { 3598 a->i[i] = a->i[i-1] + browlengths[i-1]; 3599 a->ilen[i-1] = browlengths[i-1]; 3600 } 3601 a->nz = 0; 3602 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3603 3604 /* read in nonzero values */ 3605 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3606 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3607 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3608 3609 /* set "a" and "j" values into matrix */ 3610 nzcount = 0; jcount = 0; 3611 for (i=0; i<mbs; i++) { 3612 nzcountb = nzcount; 3613 nmask = 0; 3614 for (j=0; j<bs; j++) { 3615 kmax = rowlengths[i*bs+j]; 3616 for (k=0; k<kmax; k++) { 3617 tmp = jj[nzcount++]/bs; 3618 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3619 } 3620 } 3621 /* sort the masked values */ 3622 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3623 3624 /* set "j" values into matrix */ 3625 maskcount = 1; 3626 for (j=0; j<nmask; j++) { 3627 a->j[jcount++] = masked[j]; 3628 mask[masked[j]] = maskcount++; 3629 } 3630 /* set "a" values into matrix */ 3631 ishift = bs2*a->i[i]; 3632 for (j=0; j<bs; j++) { 3633 kmax = rowlengths[i*bs+j]; 3634 for (k=0; k<kmax; k++) { 3635 tmp = jj[nzcountb]/bs ; 3636 block = mask[tmp] - 1; 3637 point = jj[nzcountb] - bs*tmp; 3638 idx = ishift + bs2*block + j + bs*point; 3639 a->a[idx] = (MatScalar)aa[nzcountb++]; 3640 } 3641 } 3642 /* zero out the mask elements we set */ 3643 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3644 } 3645 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3646 3647 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3648 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3649 ierr = PetscFree(aa);CHKERRQ(ierr); 3650 ierr = PetscFree(jj);CHKERRQ(ierr); 3651 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3652 3653 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3656 PetscFunctionReturn(0); 3657 } 3658 3659 #undef __FUNCT__ 3660 #define __FUNCT__ "MatCreateSeqBAIJ" 3661 /*@C 3662 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3663 compressed row) format. For good matrix assembly performance the 3664 user should preallocate the matrix storage by setting the parameter nz 3665 (or the array nnz). By setting these parameters accurately, performance 3666 during matrix assembly can be increased by more than a factor of 50. 3667 3668 Collective on MPI_Comm 3669 3670 Input Parameters: 3671 + comm - MPI communicator, set to PETSC_COMM_SELF 3672 . bs - size of block 3673 . m - number of rows 3674 . n - number of columns 3675 . nz - number of nonzero blocks per block row (same for all rows) 3676 - nnz - array containing the number of nonzero blocks in the various block rows 3677 (possibly different for each block row) or PETSC_NULL 3678 3679 Output Parameter: 3680 . A - the matrix 3681 3682 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3683 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3684 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3685 3686 Options Database Keys: 3687 . -mat_no_unroll - uses code that does not unroll the loops in the 3688 block calculations (much slower) 3689 . -mat_block_size - size of the blocks to use 3690 3691 Level: intermediate 3692 3693 Notes: 3694 The number of rows and columns must be divisible by blocksize. 3695 3696 If the nnz parameter is given then the nz parameter is ignored 3697 3698 A nonzero block is any block that as 1 or more nonzeros in it 3699 3700 The block AIJ format is fully compatible with standard Fortran 77 3701 storage. That is, the stored row and column indices can begin at 3702 either one (as in Fortran) or zero. See the users' manual for details. 3703 3704 Specify the preallocated storage with either nz or nnz (not both). 3705 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3706 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3707 matrices. 3708 3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3710 @*/ 3711 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3712 { 3713 PetscErrorCode ierr; 3714 3715 PetscFunctionBegin; 3716 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3717 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3718 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3719 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3720 PetscFunctionReturn(0); 3721 } 3722 3723 #undef __FUNCT__ 3724 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3725 /*@C 3726 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3727 per row in the matrix. For good matrix assembly performance the 3728 user should preallocate the matrix storage by setting the parameter nz 3729 (or the array nnz). By setting these parameters accurately, performance 3730 during matrix assembly can be increased by more than a factor of 50. 3731 3732 Collective on MPI_Comm 3733 3734 Input Parameters: 3735 + A - the matrix 3736 . bs - size of block 3737 . nz - number of block nonzeros per block row (same for all rows) 3738 - nnz - array containing the number of block nonzeros in the various block rows 3739 (possibly different for each block row) or PETSC_NULL 3740 3741 Options Database Keys: 3742 . -mat_no_unroll - uses code that does not unroll the loops in the 3743 block calculations (much slower) 3744 . -mat_block_size - size of the blocks to use 3745 3746 Level: intermediate 3747 3748 Notes: 3749 If the nnz parameter is given then the nz parameter is ignored 3750 3751 You can call MatGetInfo() to get information on how effective the preallocation was; 3752 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3753 You can also run with the option -info and look for messages with the string 3754 malloc in them to see if additional memory allocation was needed. 3755 3756 The block AIJ format is fully compatible with standard Fortran 77 3757 storage. That is, the stored row and column indices can begin at 3758 either one (as in Fortran) or zero. See the users' manual for details. 3759 3760 Specify the preallocated storage with either nz or nnz (not both). 3761 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3762 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3763 3764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3765 @*/ 3766 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3767 { 3768 PetscErrorCode ierr; 3769 3770 PetscFunctionBegin; 3771 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3777 /*@C 3778 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3779 (the default sequential PETSc format). 3780 3781 Collective on MPI_Comm 3782 3783 Input Parameters: 3784 + A - the matrix 3785 . i - the indices into j for the start of each local row (starts with zero) 3786 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3787 - v - optional values in the matrix 3788 3789 Level: developer 3790 3791 .keywords: matrix, aij, compressed row, sparse 3792 3793 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3794 @*/ 3795 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3796 { 3797 PetscErrorCode ierr; 3798 3799 PetscFunctionBegin; 3800 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3801 PetscFunctionReturn(0); 3802 } 3803 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3807 /*@ 3808 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3809 3810 Collective on MPI_Comm 3811 3812 Input Parameters: 3813 + comm - must be an MPI communicator of size 1 3814 . bs - size of block 3815 . m - number of rows 3816 . n - number of columns 3817 . i - row indices 3818 . j - column indices 3819 - a - matrix values 3820 3821 Output Parameter: 3822 . mat - the matrix 3823 3824 Level: advanced 3825 3826 Notes: 3827 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3828 once the matrix is destroyed 3829 3830 You cannot set new nonzero locations into this matrix, that will generate an error. 3831 3832 The i and j indices are 0 based 3833 3834 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3835 3836 3837 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3838 3839 @*/ 3840 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3841 { 3842 PetscErrorCode ierr; 3843 PetscInt ii; 3844 Mat_SeqBAIJ *baij; 3845 3846 PetscFunctionBegin; 3847 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3848 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3849 3850 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3851 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3852 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3853 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3854 baij = (Mat_SeqBAIJ*)(*mat)->data; 3855 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3856 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3857 3858 baij->i = i; 3859 baij->j = j; 3860 baij->a = a; 3861 baij->singlemalloc = PETSC_FALSE; 3862 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3863 baij->free_a = PETSC_FALSE; 3864 baij->free_ij = PETSC_FALSE; 3865 3866 for (ii=0; ii<m; ii++) { 3867 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3868 #if defined(PETSC_USE_DEBUG) 3869 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3870 #endif 3871 } 3872 #if defined(PETSC_USE_DEBUG) 3873 for (ii=0; ii<baij->i[m]; ii++) { 3874 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3875 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3876 } 3877 #endif 3878 3879 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3881 PetscFunctionReturn(0); 3882 } 3883