1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 28 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 EXTERN_C_BEGIN 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 37 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 40 PetscErrorCode ierr; 41 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 42 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 43 PetscReal shift = 0.0; 44 45 PetscFunctionBegin; 46 if (a->idiagvalid) PetscFunctionReturn(0); 47 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 48 diag_offset = a->diag; 49 if (!a->idiag) { 50 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 51 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 52 } 53 diag = a->idiag; 54 mdiag = a->idiag+bs2*mbs; 55 /* factor and invert each block */ 56 switch (bs){ 57 case 1: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 1*diag_offset[i]; 60 diag[0] = odiag[0]; 61 mdiag[0] = odiag[0]; 62 diag[0] = 1.0 / (diag[0] + shift); 63 diag += 1; 64 mdiag += 1; 65 } 66 break; 67 case 2: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 4*diag_offset[i]; 70 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 71 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 72 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 87 diag += 9; 88 mdiag += 9; 89 } 90 break; 91 case 4: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 16*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 107 diag += 25; 108 mdiag += 25; 109 } 110 break; 111 case 6: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 36*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 127 diag += 49; 128 mdiag += 49; 129 } 130 break; 131 default: 132 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 133 for (i=0; i<mbs; i++) { 134 odiag = v + bs2*diag_offset[i]; 135 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 153 PetscScalar *x,x1,s1; 154 const PetscScalar *b; 155 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,j; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 its = its*lits; 163 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 164 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 165 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 166 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 167 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 168 169 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 170 171 diag = a->diag; 172 idiag = a->idiag; 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 178 x[0] = b[0]*idiag[0]; 179 i2 = 1; 180 idiag += 1; 181 for (i=1; i<m; i++) { 182 v = aa + ai[i]; 183 vi = aj + ai[i]; 184 nz = diag[i] - ai[i]; 185 s1 = b[i2]; 186 for (j=0; j<nz; j++) { 187 s1 -= v[j]*x[vi[j]]; 188 } 189 x[i2] = idiag[0]*s1; 190 idiag += 1; 191 i2 += 1; 192 } 193 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 194 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 195 } 196 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 197 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 198 i2 = 0; 199 mdiag = a->idiag+a->mbs; 200 for (i=0; i<m; i++) { 201 x1 = x[i2]; 202 x[i2] = mdiag[0]*x1; 203 mdiag += 1; 204 i2 += 1; 205 } 206 ierr = PetscLogFlops(m);CHKERRQ(ierr); 207 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 208 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 209 } 210 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 211 idiag = a->idiag+a->mbs - 1; 212 i2 = m - 1; 213 x1 = x[i2]; 214 x[i2] = idiag[0]*x1; 215 idiag -= 1; 216 i2 -= 1; 217 for (i=m-2; i>=0; i--) { 218 v = aa + (diag[i]+1); 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 s1 = x[i2]; 222 for (j=0; j<nz; j++) { 223 s1 -= v[j]*x[vi[j]]; 224 } 225 x[i2] = idiag[0]*s1; 226 idiag -= 1; 227 i2 -= 1; 228 } 229 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 230 } 231 } else { 232 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 233 } 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 PetscScalar *x,x1,x2,s1,s2; 245 const PetscScalar *b; 246 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 247 PetscErrorCode ierr; 248 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 249 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 250 251 PetscFunctionBegin; 252 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 253 its = its*lits; 254 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 255 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 256 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 257 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 258 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 259 260 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 261 262 diag = a->diag; 263 idiag = a->idiag; 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 266 267 if (flag & SOR_ZERO_INITIAL_GUESS) { 268 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 269 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 270 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 271 i2 = 2; 272 idiag += 4; 273 for (i=1; i<m; i++) { 274 v = aa + 4*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s1 = b[i2]; s2 = b[i2+1]; 278 for (j=0; j<nz; j++) { 279 idx = 2*vi[j]; 280 it = 4*j; 281 x1 = x[idx]; x2 = x[1+idx]; 282 s1 -= v[it]*x1 + v[it+2]*x2; 283 s2 -= v[it+1]*x1 + v[it+3]*x2; 284 } 285 x[i2] = idiag[0]*s1 + idiag[2]*s2; 286 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 287 idiag += 4; 288 i2 += 2; 289 } 290 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 291 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 292 } 293 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 294 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 295 i2 = 0; 296 mdiag = a->idiag+4*a->mbs; 297 for (i=0; i<m; i++) { 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 300 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 301 mdiag += 4; 302 i2 += 2; 303 } 304 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 305 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 306 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 307 } 308 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 309 idiag = a->idiag+4*a->mbs - 4; 310 i2 = 2*m - 2; 311 x1 = x[i2]; x2 = x[i2+1]; 312 x[i2] = idiag[0]*x1 + idiag[2]*x2; 313 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 314 idiag -= 4; 315 i2 -= 2; 316 for (i=m-2; i>=0; i--) { 317 v = aa + 4*(diag[i]+1); 318 vi = aj + diag[i] + 1; 319 nz = ai[i+1] - diag[i] - 1; 320 s1 = x[i2]; s2 = x[i2+1]; 321 for (j=0; j<nz; j++) { 322 idx = 2*vi[j]; 323 it = 4*j; 324 x1 = x[idx]; x2 = x[1+idx]; 325 s1 -= v[it]*x1 + v[it+2]*x2; 326 s2 -= v[it+1]*x1 + v[it+3]*x2; 327 } 328 x[i2] = idiag[0]*s1 + idiag[2]*s2; 329 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 330 idiag -= 4; 331 i2 -= 2; 332 } 333 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 334 } 335 } else { 336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 337 } 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 348 PetscScalar *x,x1,x2,x3,s1,s2,s3; 349 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 350 const PetscScalar *b; 351 PetscErrorCode ierr; 352 PetscInt m = a->mbs,i,i2,nz,idx; 353 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 354 355 PetscFunctionBegin; 356 its = its*lits; 357 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 358 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 359 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 360 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 361 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 362 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 363 364 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 365 366 diag = a->diag; 367 idiag = a->idiag; 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 370 371 if (flag & SOR_ZERO_INITIAL_GUESS) { 372 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 373 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 374 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 375 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 376 i2 = 3; 377 idiag += 9; 378 for (i=1; i<m; i++) { 379 v = aa + 9*ai[i]; 380 vi = aj + ai[i]; 381 nz = diag[i] - ai[i]; 382 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 383 while (nz--) { 384 idx = 3*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 386 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 387 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 388 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 392 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 393 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 394 idiag += 9; 395 i2 += 3; 396 } 397 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 398 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 399 } 400 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 401 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 402 i2 = 0; 403 mdiag = a->idiag+9*a->mbs; 404 for (i=0; i<m; i++) { 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 406 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 407 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 408 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 409 mdiag += 9; 410 i2 += 3; 411 } 412 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 413 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 414 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 415 } 416 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 417 idiag = a->idiag+9*a->mbs - 9; 418 i2 = 3*m - 3; 419 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 420 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 421 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 422 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 423 idiag -= 9; 424 i2 -= 3; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 433 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 434 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 435 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 436 v += 9; 437 } 438 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 439 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 440 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 445 } 446 } else { 447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 448 } 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 459 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 460 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 461 const PetscScalar *b; 462 PetscErrorCode ierr; 463 PetscInt m = a->mbs,i,i2,nz,idx; 464 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 465 466 PetscFunctionBegin; 467 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 468 its = its*lits; 469 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 470 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 471 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 472 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 473 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 474 475 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 476 477 diag = a->diag; 478 idiag = a->idiag; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 484 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 485 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 486 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 487 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 488 i2 = 4; 489 idiag += 16; 490 for (i=1; i<m; i++) { 491 v = aa + 16*ai[i]; 492 vi = aj + ai[i]; 493 nz = diag[i] - ai[i]; 494 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 495 while (nz--) { 496 idx = 4*(*vi++); 497 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 498 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 499 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 500 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 501 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 502 v += 16; 503 } 504 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 505 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 506 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 507 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 508 idiag += 16; 509 i2 += 4; 510 } 511 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 512 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 513 } 514 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 515 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 516 i2 = 0; 517 mdiag = a->idiag+16*a->mbs; 518 for (i=0; i<m; i++) { 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 521 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 522 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 523 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 524 mdiag += 16; 525 i2 += 4; 526 } 527 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 528 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 529 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 530 } 531 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 532 idiag = a->idiag+16*a->mbs - 16; 533 i2 = 4*m - 4; 534 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 535 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 536 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 537 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 538 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 539 idiag -= 16; 540 i2 -= 4; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 16*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 546 while (nz--) { 547 idx = 4*(*vi++); 548 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 549 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 556 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 557 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 558 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 559 idiag -= 16; 560 i2 -= 4; 561 } 562 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 563 } 564 } else { 565 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 566 } 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 577 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 578 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 579 const PetscScalar *b; 580 PetscErrorCode ierr; 581 PetscInt m = a->mbs,i,i2,nz,idx; 582 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 583 584 PetscFunctionBegin; 585 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 586 its = its*lits; 587 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 588 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 589 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 590 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 591 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 592 593 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 594 595 diag = a->diag; 596 idiag = a->idiag; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 599 600 if (flag & SOR_ZERO_INITIAL_GUESS) { 601 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 602 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 603 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 604 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 605 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 606 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 607 i2 = 5; 608 idiag += 25; 609 for (i=1; i<m; i++) { 610 v = aa + 25*ai[i]; 611 vi = aj + ai[i]; 612 nz = diag[i] - ai[i]; 613 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 614 while (nz--) { 615 idx = 5*(*vi++); 616 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 617 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 618 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 619 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 620 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 621 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 622 v += 25; 623 } 624 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 625 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 626 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 627 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 628 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 629 idiag += 25; 630 i2 += 5; 631 } 632 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 633 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 634 } 635 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 636 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 637 i2 = 0; 638 mdiag = a->idiag+25*a->mbs; 639 for (i=0; i<m; i++) { 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 642 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 643 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 644 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 645 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 646 mdiag += 25; 647 i2 += 5; 648 } 649 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 650 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 651 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 652 } 653 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 654 idiag = a->idiag+25*a->mbs - 25; 655 i2 = 5*m - 5; 656 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 657 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 658 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 659 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 660 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 661 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 662 idiag -= 25; 663 i2 -= 5; 664 for (i=m-2; i>=0; i--) { 665 v = aa + 25*(diag[i]+1); 666 vi = aj + diag[i] + 1; 667 nz = ai[i+1] - diag[i] - 1; 668 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 672 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 673 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 674 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 675 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 676 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 677 v += 25; 678 } 679 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 680 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 681 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 682 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 683 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 684 idiag -= 25; 685 i2 -= 5; 686 } 687 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 688 } 689 } else { 690 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 702 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 703 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 704 const PetscScalar *b; 705 PetscErrorCode ierr; 706 PetscInt m = a->mbs,i,i2,nz,idx; 707 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 708 709 PetscFunctionBegin; 710 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 711 its = its*lits; 712 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 713 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 714 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 715 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 716 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 717 718 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 719 720 diag = a->diag; 721 idiag = a->idiag; 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 724 725 if (flag & SOR_ZERO_INITIAL_GUESS) { 726 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 727 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 728 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 729 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 730 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 731 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 732 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 733 i2 = 6; 734 idiag += 36; 735 for (i=1; i<m; i++) { 736 v = aa + 36*ai[i]; 737 vi = aj + ai[i]; 738 nz = diag[i] - ai[i]; 739 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 740 while (nz--) { 741 idx = 6*(*vi++); 742 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 743 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 744 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 745 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 746 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 747 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 748 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 749 v += 36; 750 } 751 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 752 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 753 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 754 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 755 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 756 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 757 idiag += 36; 758 i2 += 6; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 762 } 763 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 764 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 765 i2 = 0; 766 mdiag = a->idiag+36*a->mbs; 767 for (i=0; i<m; i++) { 768 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 769 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 770 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 771 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 772 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 773 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 774 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 775 mdiag += 36; 776 i2 += 6; 777 } 778 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 779 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 780 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 781 } 782 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 783 idiag = a->idiag+36*a->mbs - 36; 784 i2 = 6*m - 6; 785 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 786 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 787 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 788 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 789 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 790 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 791 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 792 idiag -= 36; 793 i2 -= 6; 794 for (i=m-2; i>=0; i--) { 795 v = aa + 36*(diag[i]+1); 796 vi = aj + diag[i] + 1; 797 nz = ai[i+1] - diag[i] - 1; 798 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 799 while (nz--) { 800 idx = 6*(*vi++); 801 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 802 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 803 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 804 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 805 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 806 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 807 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 808 v += 36; 809 } 810 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 811 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 812 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 813 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 814 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 815 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 816 idiag -= 36; 817 i2 -= 6; 818 } 819 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 820 } 821 } else { 822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 823 } 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 835 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 836 const PetscScalar *b; 837 PetscErrorCode ierr; 838 PetscInt m = a->mbs,i,i2,nz,idx; 839 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 840 841 PetscFunctionBegin; 842 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 843 its = its*lits; 844 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 845 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 846 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 847 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 848 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 849 850 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 851 852 diag = a->diag; 853 idiag = a->idiag; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 856 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 859 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 860 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 861 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 862 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 863 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 864 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 865 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 866 i2 = 7; 867 idiag += 49; 868 for (i=1; i<m; i++) { 869 v = aa + 49*ai[i]; 870 vi = aj + ai[i]; 871 nz = diag[i] - ai[i]; 872 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 873 while (nz--) { 874 idx = 7*(*vi++); 875 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 876 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 877 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 878 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 879 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 880 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 881 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 882 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 883 v += 49; 884 } 885 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 886 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 887 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 888 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 889 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 890 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 891 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 892 idiag += 49; 893 i2 += 7; 894 } 895 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 896 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 897 } 898 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 899 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 900 i2 = 0; 901 mdiag = a->idiag+49*a->mbs; 902 for (i=0; i<m; i++) { 903 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 904 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 905 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 906 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 907 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 908 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 909 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 910 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 911 mdiag += 49; 912 i2 += 7; 913 } 914 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 915 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 916 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 917 } 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 919 idiag = a->idiag+49*a->mbs - 49; 920 i2 = 7*m - 7; 921 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 922 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 923 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 924 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 925 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 926 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 927 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 928 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 929 idiag -= 49; 930 i2 -= 7; 931 for (i=m-2; i>=0; i--) { 932 v = aa + 49*(diag[i]+1); 933 vi = aj + diag[i] + 1; 934 nz = ai[i+1] - diag[i] - 1; 935 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 936 while (nz--) { 937 idx = 7*(*vi++); 938 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 939 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 940 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 941 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 942 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 943 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 944 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 945 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 946 v += 49; 947 } 948 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 949 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 950 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 951 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 952 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 953 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 954 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 955 idiag -= 49; 956 i2 -= 7; 957 } 958 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 959 } 960 } else { 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*work,*w,*workt; 974 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 975 const PetscScalar *b; 976 PetscErrorCode ierr; 977 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 978 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 979 980 PetscFunctionBegin; 981 its = its*lits; 982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 983 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 984 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 985 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 986 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 987 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 988 989 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 990 991 diag = a->diag; 992 idiag = a->idiag; 993 if (!a->mult_work) { 994 k = PetscMax(A->rmap->n,A->cmap->n); 995 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 996 } 997 work = a->mult_work; 998 if (!a->sor_work) { 999 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1000 } 1001 w = a->sor_work; 1002 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1005 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1008 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1009 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1010 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1011 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1012 i2 = bs; 1013 idiag += bs2; 1014 for (i=1; i<m; i++) { 1015 v = aa + bs2*ai[i]; 1016 vi = aj + ai[i]; 1017 nz = diag[i] - ai[i]; 1018 1019 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1020 /* copy all rows of x that are needed into contiguous space */ 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1027 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1028 while (nz--) { 1029 idx = N*(*vi++); 1030 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1031 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1032 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1033 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1034 v += N2; 1035 } */ 1036 1037 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1038 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1039 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1040 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1041 1042 idiag += bs2; 1043 i2 += bs; 1044 } 1045 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1046 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1047 } 1048 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1049 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1050 i2 = 0; 1051 mdiag = a->idiag+bs2*a->mbs; 1052 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 for (i=0; i<m; i++) { 1054 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1055 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1056 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1057 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1058 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1059 1060 mdiag += bs2; 1061 i2 += bs; 1062 } 1063 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1064 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1065 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 idiag = a->idiag+bs2*a->mbs - bs2; 1069 i2 = bs*m - bs; 1070 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1072 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1073 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1074 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1075 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1076 idiag -= bs2; 1077 i2 -= bs; 1078 for (i=m-2; i>=0; i--) { 1079 v = aa + bs2*(diag[i]+1); 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i+1] - diag[i] - 1; 1082 1083 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1084 /* copy all rows of x that are needed into contiguous space */ 1085 workt = work; 1086 for (j=0; j<nz; j++) { 1087 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1088 workt += bs; 1089 } 1090 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1091 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1092 while (nz--) { 1093 idx = N*(*vi++); 1094 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1095 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1096 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1097 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1098 v += N2; 1099 } */ 1100 1101 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1102 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1103 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1104 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1105 idiag -= bs2; 1106 i2 -= bs; 1107 } 1108 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1109 } 1110 } else { 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1112 } 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1120 */ 1121 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1124 #define matsetvaluesblocked4_ matsetvaluesblocked4 1125 #endif 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "matsetvaluesblocked4_" 1130 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1131 { 1132 Mat A = *AA; 1133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1134 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1135 PetscInt *ai=a->i,*ailen=a->ilen; 1136 PetscInt *aj=a->j,stepval,lastcol = -1; 1137 const PetscScalar *value = v; 1138 MatScalar *ap,*aa = a->a,*bap; 1139 1140 PetscFunctionBegin; 1141 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1142 stepval = (n-1)*4; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; 1145 rp = aj + ai[row]; 1146 ap = aa + 16*ai[row]; 1147 nrow = ailen[row]; 1148 low = 0; 1149 high = nrow; 1150 for (l=0; l<n; l++) { /* loop over added columns */ 1151 col = in[l]; 1152 if (col <= lastcol) low = 0; else high = nrow; 1153 lastcol = col; 1154 value = v + k*(stepval+4 + l)*4; 1155 while (high-low > 7) { 1156 t = (low+high)/2; 1157 if (rp[t] > col) high = t; 1158 else low = t; 1159 } 1160 for (i=low; i<high; i++) { 1161 if (rp[i] > col) break; 1162 if (rp[i] == col) { 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] += *value++; 1167 } 1168 } 1169 goto noinsert2; 1170 } 1171 } 1172 N = nrow++ - 1; 1173 high++; /* added new column index thus must search to one higher than before */ 1174 /* shift up all the later entries in this row */ 1175 for (ii=N; ii>=i; ii--) { 1176 rp[ii+1] = rp[ii]; 1177 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1178 } 1179 if (N >= i) { 1180 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1181 } 1182 rp[i] = col; 1183 bap = ap + 16*i; 1184 for (ii=0; ii<4; ii++,value+=stepval) { 1185 for (jj=ii; jj<16; jj+=4) { 1186 bap[jj] = *value++; 1187 } 1188 } 1189 noinsert2:; 1190 low = i; 1191 } 1192 ailen[row] = nrow; 1193 } 1194 PetscFunctionReturnVoid(); 1195 } 1196 EXTERN_C_END 1197 1198 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1199 #define matsetvalues4_ MATSETVALUES4 1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1201 #define matsetvalues4_ matsetvalues4 1202 #endif 1203 1204 EXTERN_C_BEGIN 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSetValues4_" 1207 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1208 { 1209 Mat A = *AA; 1210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1211 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1212 PetscInt *ai=a->i,*ailen=a->ilen; 1213 PetscInt *aj=a->j,brow,bcol; 1214 PetscInt ridx,cidx,lastcol = -1; 1215 MatScalar *ap,value,*aa=a->a,*bap; 1216 1217 PetscFunctionBegin; 1218 for (k=0; k<m; k++) { /* loop over added rows */ 1219 row = im[k]; brow = row/4; 1220 rp = aj + ai[brow]; 1221 ap = aa + 16*ai[brow]; 1222 nrow = ailen[brow]; 1223 low = 0; 1224 high = nrow; 1225 for (l=0; l<n; l++) { /* loop over added columns */ 1226 col = in[l]; bcol = col/4; 1227 ridx = row % 4; cidx = col % 4; 1228 value = v[l + k*n]; 1229 if (col <= lastcol) low = 0; else high = nrow; 1230 lastcol = col; 1231 while (high-low > 7) { 1232 t = (low+high)/2; 1233 if (rp[t] > bcol) high = t; 1234 else low = t; 1235 } 1236 for (i=low; i<high; i++) { 1237 if (rp[i] > bcol) break; 1238 if (rp[i] == bcol) { 1239 bap = ap + 16*i + 4*cidx + ridx; 1240 *bap += value; 1241 goto noinsert1; 1242 } 1243 } 1244 N = nrow++ - 1; 1245 high++; /* added new column thus must search to one higher than before */ 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1250 } 1251 if (N>=i) { 1252 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1253 } 1254 rp[i] = bcol; 1255 ap[16*i + 4*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturnVoid(); 1262 } 1263 EXTERN_C_END 1264 1265 /* 1266 Checks for missing diagonals 1267 */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 PetscInt *diag,*jj = a->j,i; 1275 1276 PetscFunctionBegin; 1277 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1278 *missing = PETSC_FALSE; 1279 if (A->rmap->n > 0 && !jj) { 1280 *missing = PETSC_TRUE; 1281 if (d) *d = 0; 1282 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1283 } else { 1284 diag = a->diag; 1285 for (i=0; i<a->mbs; i++) { 1286 if (jj[diag[i]] != i) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = i; 1289 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1290 } 1291 } 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 PetscInt i,j,m = a->mbs; 1303 1304 PetscFunctionBegin; 1305 if (!a->diag) { 1306 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1307 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1308 a->free_diag = PETSC_TRUE; 1309 } 1310 for (i=0; i<m; i++) { 1311 a->diag[i] = a->i[i+1]; 1312 for (j=a->i[i]; j<a->i[i+1]; j++) { 1313 if (a->j[j] == i) { 1314 a->diag[i] = j; 1315 break; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1332 PetscInt *tia, *tja; 1333 1334 PetscFunctionBegin; 1335 *nn = n; 1336 if (!ia) PetscFunctionReturn(0); 1337 if (symmetric) { 1338 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1339 nz = tia[n]; 1340 } else { 1341 tia = a->i; tja = a->j; 1342 } 1343 1344 if (!blockcompressed && bs > 1) { 1345 (*nn) *= bs; 1346 /* malloc & create the natural set of indices */ 1347 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1348 if (n) { 1349 (*ia)[0] = 0; 1350 for (j=1; j<bs; j++) { 1351 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1352 } 1353 } 1354 1355 for (i=1; i<n; i++) { 1356 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1359 } 1360 } 1361 if (n) { 1362 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1363 } 1364 1365 if (ja) { 1366 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1367 cnt = 0; 1368 for (i=0; i<n; i++) { 1369 for (j=0; j<bs; j++) { 1370 for (k=tia[i]; k<tia[i+1]; k++) { 1371 for (l=0; l<bs; l++) { 1372 (*ja)[cnt++] = bs*tja[k] + l; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 1379 n *= bs; 1380 nz *= bs*bs; 1381 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1382 ierr = PetscFree(tia);CHKERRQ(ierr); 1383 ierr = PetscFree(tja);CHKERRQ(ierr); 1384 } 1385 } else if (oshift == 1) { 1386 if (symmetric) { 1387 PetscInt nz = tia[A->rmap->n/bs]; 1388 /* add 1 to i and j indices */ 1389 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1390 *ia = tia; 1391 if (ja) { 1392 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1393 *ja = tja; 1394 } 1395 } else { 1396 PetscInt nz = a->i[A->rmap->n/bs]; 1397 /* malloc space and add 1 to i and j indices */ 1398 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1399 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1400 if (ja) { 1401 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1402 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1403 } 1404 } 1405 } else { 1406 *ia = tia; 1407 if (ja) *ja = tja; 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (!ia) PetscFunctionReturn(0); 1421 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1422 ierr = PetscFree(*ia);CHKERRQ(ierr); 1423 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 #if defined(PETSC_USE_LOG) 1437 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1438 #endif 1439 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1441 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1442 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1443 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1444 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1445 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1446 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1447 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1448 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1449 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1450 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1451 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1452 1453 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1454 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1455 ierr = PetscFree(A->data);CHKERRQ(ierr); 1456 1457 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1458 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1459 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1461 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1473 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 switch (op) { 1480 case MAT_ROW_ORIENTED: 1481 a->roworiented = flg; 1482 break; 1483 case MAT_KEEP_NONZERO_PATTERN: 1484 a->keepnonzeropattern = flg; 1485 break; 1486 case MAT_NEW_NONZERO_LOCATIONS: 1487 a->nonew = (flg ? 0 : 1); 1488 break; 1489 case MAT_NEW_NONZERO_LOCATION_ERR: 1490 a->nonew = (flg ? -1 : 0); 1491 break; 1492 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1493 a->nonew = (flg ? -2 : 0); 1494 break; 1495 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1496 a->nounused = (flg ? -1 : 0); 1497 break; 1498 case MAT_CHECK_COMPRESSED_ROW: 1499 a->compressedrow.check = flg; 1500 break; 1501 case MAT_NEW_DIAGONALS: 1502 case MAT_IGNORE_OFF_PROC_ENTRIES: 1503 case MAT_USE_HASH_TABLE: 1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1505 break; 1506 case MAT_SYMMETRIC: 1507 case MAT_STRUCTURALLY_SYMMETRIC: 1508 case MAT_HERMITIAN: 1509 case MAT_SYMMETRY_ETERNAL: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 default: 1513 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1525 MatScalar *aa,*aa_i; 1526 PetscScalar *v_i; 1527 1528 PetscFunctionBegin; 1529 bs = A->rmap->bs; 1530 ai = a->i; 1531 aj = a->j; 1532 aa = a->a; 1533 bs2 = a->bs2; 1534 1535 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1536 1537 bn = row/bs; /* Block number */ 1538 bp = row % bs; /* Block Position */ 1539 M = ai[bn+1] - ai[bn]; 1540 *nz = bs*M; 1541 1542 if (v) { 1543 *v = 0; 1544 if (*nz) { 1545 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1546 for (i=0; i<M; i++) { /* for each block in the block row */ 1547 v_i = *v + i*bs; 1548 aa_i = aa + bs2*(ai[bn] + i); 1549 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1550 } 1551 } 1552 } 1553 1554 if (idx) { 1555 *idx = 0; 1556 if (*nz) { 1557 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1558 for (i=0; i<M; i++) { /* for each block in the block row */ 1559 idx_i = *idx + i*bs; 1560 itmp = bs*aj[ai[bn] + i]; 1561 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1562 } 1563 } 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1571 { 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1576 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1577 PetscFunctionReturn(0); 1578 } 1579 1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1585 { 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1587 Mat C; 1588 PetscErrorCode ierr; 1589 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1590 PetscInt *rows,*cols,bs2=a->bs2; 1591 MatScalar *array; 1592 1593 PetscFunctionBegin; 1594 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1595 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1596 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1597 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1598 1599 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1600 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1601 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1602 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1603 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1604 ierr = PetscFree(col);CHKERRQ(ierr); 1605 } else { 1606 C = *B; 1607 } 1608 1609 array = a->a; 1610 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1611 for (i=0; i<mbs; i++) { 1612 cols[0] = i*bs; 1613 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1614 len = ai[i+1] - ai[i]; 1615 for (j=0; j<len; j++) { 1616 rows[0] = (*aj++)*bs; 1617 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1618 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1619 array += bs2; 1620 } 1621 } 1622 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1623 1624 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 1627 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1628 *B = C; 1629 } else { 1630 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 EXTERN_C_BEGIN 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1638 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1639 { 1640 PetscErrorCode ierr; 1641 Mat Btrans; 1642 1643 PetscFunctionBegin; 1644 *f = PETSC_FALSE; 1645 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1646 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1647 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1648 PetscFunctionReturn(0); 1649 } 1650 EXTERN_C_END 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1654 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1655 { 1656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1657 PetscErrorCode ierr; 1658 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1659 int fd; 1660 PetscScalar *aa; 1661 FILE *file; 1662 1663 PetscFunctionBegin; 1664 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1665 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1666 col_lens[0] = MAT_FILE_CLASSID; 1667 1668 col_lens[1] = A->rmap->N; 1669 col_lens[2] = A->cmap->n; 1670 col_lens[3] = a->nz*bs2; 1671 1672 /* store lengths of each row and write (including header) to file */ 1673 count = 0; 1674 for (i=0; i<a->mbs; i++) { 1675 for (j=0; j<bs; j++) { 1676 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1677 } 1678 } 1679 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1680 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1681 1682 /* store column indices (zero start index) */ 1683 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1684 count = 0; 1685 for (i=0; i<a->mbs; i++) { 1686 for (j=0; j<bs; j++) { 1687 for (k=a->i[i]; k<a->i[i+1]; k++) { 1688 for (l=0; l<bs; l++) { 1689 jj[count++] = bs*a->j[k] + l; 1690 } 1691 } 1692 } 1693 } 1694 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1695 ierr = PetscFree(jj);CHKERRQ(ierr); 1696 1697 /* store nonzero values */ 1698 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1699 count = 0; 1700 for (i=0; i<a->mbs; i++) { 1701 for (j=0; j<bs; j++) { 1702 for (k=a->i[i]; k<a->i[i+1]; k++) { 1703 for (l=0; l<bs; l++) { 1704 aa[count++] = a->a[bs2*k + l*bs + j]; 1705 } 1706 } 1707 } 1708 } 1709 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1710 ierr = PetscFree(aa);CHKERRQ(ierr); 1711 1712 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1713 if (file) { 1714 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1721 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1722 { 1723 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1724 PetscErrorCode ierr; 1725 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1726 PetscViewerFormat format; 1727 1728 PetscFunctionBegin; 1729 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1730 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1731 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1732 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1733 Mat aij; 1734 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1735 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1736 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1737 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1738 PetscFunctionReturn(0); 1739 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1740 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1741 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1742 for (i=0; i<a->mbs; i++) { 1743 for (j=0; j<bs; j++) { 1744 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1745 for (k=a->i[i]; k<a->i[i+1]; k++) { 1746 for (l=0; l<bs; l++) { 1747 #if defined(PETSC_USE_COMPLEX) 1748 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1749 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1750 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1751 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1752 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1753 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1754 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1755 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1756 } 1757 #else 1758 if (a->a[bs2*k + l*bs + j] != 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1760 } 1761 #endif 1762 } 1763 } 1764 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1765 } 1766 } 1767 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1770 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1771 for (i=0; i<a->mbs; i++) { 1772 for (j=0; j<bs; j++) { 1773 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1774 for (k=a->i[i]; k<a->i[i+1]; k++) { 1775 for (l=0; l<bs; l++) { 1776 #if defined(PETSC_USE_COMPLEX) 1777 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1778 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1779 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1780 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1781 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1782 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1783 } else { 1784 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1785 } 1786 #else 1787 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1788 #endif 1789 } 1790 } 1791 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1792 } 1793 } 1794 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 } 1796 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1802 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1803 { 1804 Mat A = (Mat) Aa; 1805 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1806 PetscErrorCode ierr; 1807 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1808 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1809 MatScalar *aa; 1810 PetscViewer viewer; 1811 1812 PetscFunctionBegin; 1813 1814 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1815 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1816 1817 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1818 1819 /* loop over matrix elements drawing boxes */ 1820 color = PETSC_DRAW_BLUE; 1821 for (i=0,row=0; i<mbs; i++,row+=bs) { 1822 for (j=a->i[i]; j<a->i[i+1]; j++) { 1823 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1824 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1825 aa = a->a + j*bs2; 1826 for (k=0; k<bs; k++) { 1827 for (l=0; l<bs; l++) { 1828 if (PetscRealPart(*aa++) >= 0.) continue; 1829 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1830 } 1831 } 1832 } 1833 } 1834 color = PETSC_DRAW_CYAN; 1835 for (i=0,row=0; i<mbs; i++,row+=bs) { 1836 for (j=a->i[i]; j<a->i[i+1]; j++) { 1837 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1838 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1839 aa = a->a + j*bs2; 1840 for (k=0; k<bs; k++) { 1841 for (l=0; l<bs; l++) { 1842 if (PetscRealPart(*aa++) != 0.) continue; 1843 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1844 } 1845 } 1846 } 1847 } 1848 1849 color = PETSC_DRAW_RED; 1850 for (i=0,row=0; i<mbs; i++,row+=bs) { 1851 for (j=a->i[i]; j<a->i[i+1]; j++) { 1852 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1853 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1854 aa = a->a + j*bs2; 1855 for (k=0; k<bs; k++) { 1856 for (l=0; l<bs; l++) { 1857 if (PetscRealPart(*aa++) <= 0.) continue; 1858 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1859 } 1860 } 1861 } 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1868 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1869 { 1870 PetscErrorCode ierr; 1871 PetscReal xl,yl,xr,yr,w,h; 1872 PetscDraw draw; 1873 PetscBool isnull; 1874 1875 PetscFunctionBegin; 1876 1877 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1878 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1879 1880 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1881 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1882 xr += w; yr += h; xl = -w; yl = -h; 1883 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1884 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1885 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatView_SeqBAIJ" 1891 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1892 { 1893 PetscErrorCode ierr; 1894 PetscBool iascii,isbinary,isdraw; 1895 1896 PetscFunctionBegin; 1897 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1898 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1899 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1900 if (iascii){ 1901 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1902 } else if (isbinary) { 1903 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1904 } else if (isdraw) { 1905 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1906 } else { 1907 Mat B; 1908 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1909 ierr = MatView(B,viewer);CHKERRQ(ierr); 1910 ierr = MatDestroy(&B);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1918 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1919 { 1920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1921 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1922 PetscInt *ai = a->i,*ailen = a->ilen; 1923 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1924 MatScalar *ap,*aa = a->a; 1925 1926 PetscFunctionBegin; 1927 for (k=0; k<m; k++) { /* loop over rows */ 1928 row = im[k]; brow = row/bs; 1929 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1930 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1931 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1932 nrow = ailen[brow]; 1933 for (l=0; l<n; l++) { /* loop over columns */ 1934 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1935 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1936 col = in[l] ; 1937 bcol = col/bs; 1938 cidx = col%bs; 1939 ridx = row%bs; 1940 high = nrow; 1941 low = 0; /* assume unsorted */ 1942 while (high-low > 5) { 1943 t = (low+high)/2; 1944 if (rp[t] > bcol) high = t; 1945 else low = t; 1946 } 1947 for (i=low; i<high; i++) { 1948 if (rp[i] > bcol) break; 1949 if (rp[i] == bcol) { 1950 *v++ = ap[bs2*i+bs*cidx+ridx]; 1951 goto finished; 1952 } 1953 } 1954 *v++ = 0.0; 1955 finished:; 1956 } 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1963 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1964 { 1965 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1966 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1967 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1968 PetscErrorCode ierr; 1969 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1970 PetscBool roworiented=a->roworiented; 1971 const PetscScalar *value = v; 1972 MatScalar *ap,*aa = a->a,*bap; 1973 1974 PetscFunctionBegin; 1975 if (roworiented) { 1976 stepval = (n-1)*bs; 1977 } else { 1978 stepval = (m-1)*bs; 1979 } 1980 for (k=0; k<m; k++) { /* loop over added rows */ 1981 row = im[k]; 1982 if (row < 0) continue; 1983 #if defined(PETSC_USE_DEBUG) 1984 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1985 #endif 1986 rp = aj + ai[row]; 1987 ap = aa + bs2*ai[row]; 1988 rmax = imax[row]; 1989 nrow = ailen[row]; 1990 low = 0; 1991 high = nrow; 1992 for (l=0; l<n; l++) { /* loop over added columns */ 1993 if (in[l] < 0) continue; 1994 #if defined(PETSC_USE_DEBUG) 1995 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1996 #endif 1997 col = in[l]; 1998 if (roworiented) { 1999 value = v + (k*(stepval+bs) + l)*bs; 2000 } else { 2001 value = v + (l*(stepval+bs) + k)*bs; 2002 } 2003 if (col <= lastcol) low = 0; else high = nrow; 2004 lastcol = col; 2005 while (high-low > 7) { 2006 t = (low+high)/2; 2007 if (rp[t] > col) high = t; 2008 else low = t; 2009 } 2010 for (i=low; i<high; i++) { 2011 if (rp[i] > col) break; 2012 if (rp[i] == col) { 2013 bap = ap + bs2*i; 2014 if (roworiented) { 2015 if (is == ADD_VALUES) { 2016 for (ii=0; ii<bs; ii++,value+=stepval) { 2017 for (jj=ii; jj<bs2; jj+=bs) { 2018 bap[jj] += *value++; 2019 } 2020 } 2021 } else { 2022 for (ii=0; ii<bs; ii++,value+=stepval) { 2023 for (jj=ii; jj<bs2; jj+=bs) { 2024 bap[jj] = *value++; 2025 } 2026 } 2027 } 2028 } else { 2029 if (is == ADD_VALUES) { 2030 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2031 for (jj=0; jj<bs; jj++) { 2032 bap[jj] += value[jj]; 2033 } 2034 bap += bs; 2035 } 2036 } else { 2037 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2038 for (jj=0; jj<bs; jj++) { 2039 bap[jj] = value[jj]; 2040 } 2041 bap += bs; 2042 } 2043 } 2044 } 2045 goto noinsert2; 2046 } 2047 } 2048 if (nonew == 1) goto noinsert2; 2049 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2050 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2051 N = nrow++ - 1; high++; 2052 /* shift up all the later entries in this row */ 2053 for (ii=N; ii>=i; ii--) { 2054 rp[ii+1] = rp[ii]; 2055 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2056 } 2057 if (N >= i) { 2058 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2059 } 2060 rp[i] = col; 2061 bap = ap + bs2*i; 2062 if (roworiented) { 2063 for (ii=0; ii<bs; ii++,value+=stepval) { 2064 for (jj=ii; jj<bs2; jj+=bs) { 2065 bap[jj] = *value++; 2066 } 2067 } 2068 } else { 2069 for (ii=0; ii<bs; ii++,value+=stepval) { 2070 for (jj=0; jj<bs; jj++) { 2071 *bap++ = *value++; 2072 } 2073 } 2074 } 2075 noinsert2:; 2076 low = i; 2077 } 2078 ailen[row] = nrow; 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2085 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2086 { 2087 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2088 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2089 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2090 PetscErrorCode ierr; 2091 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2092 MatScalar *aa = a->a,*ap; 2093 PetscReal ratio=0.6; 2094 2095 PetscFunctionBegin; 2096 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2097 2098 if (m) rmax = ailen[0]; 2099 for (i=1; i<mbs; i++) { 2100 /* move each row back by the amount of empty slots (fshift) before it*/ 2101 fshift += imax[i-1] - ailen[i-1]; 2102 rmax = PetscMax(rmax,ailen[i]); 2103 if (fshift) { 2104 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2105 N = ailen[i]; 2106 for (j=0; j<N; j++) { 2107 ip[j-fshift] = ip[j]; 2108 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2109 } 2110 } 2111 ai[i] = ai[i-1] + ailen[i-1]; 2112 } 2113 if (mbs) { 2114 fshift += imax[mbs-1] - ailen[mbs-1]; 2115 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2116 } 2117 /* reset ilen and imax for each row */ 2118 for (i=0; i<mbs; i++) { 2119 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2120 } 2121 a->nz = ai[mbs]; 2122 2123 /* diagonals may have moved, so kill the diagonal pointers */ 2124 a->idiagvalid = PETSC_FALSE; 2125 if (fshift && a->diag) { 2126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2127 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2128 a->diag = 0; 2129 } 2130 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2131 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2132 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2133 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2134 A->info.mallocs += a->reallocs; 2135 a->reallocs = 0; 2136 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2137 2138 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2139 A->same_nonzero = PETSC_TRUE; 2140 PetscFunctionReturn(0); 2141 } 2142 2143 /* 2144 This function returns an array of flags which indicate the locations of contiguous 2145 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2146 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2147 Assume: sizes should be long enough to hold all the values. 2148 */ 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2151 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2152 { 2153 PetscInt i,j,k,row; 2154 PetscBool flg; 2155 2156 PetscFunctionBegin; 2157 for (i=0,j=0; i<n; j++) { 2158 row = idx[i]; 2159 if (row%bs!=0) { /* Not the begining of a block */ 2160 sizes[j] = 1; 2161 i++; 2162 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2163 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2164 i++; 2165 } else { /* Begining of the block, so check if the complete block exists */ 2166 flg = PETSC_TRUE; 2167 for (k=1; k<bs; k++) { 2168 if (row+k != idx[i+k]) { /* break in the block */ 2169 flg = PETSC_FALSE; 2170 break; 2171 } 2172 } 2173 if (flg) { /* No break in the bs */ 2174 sizes[j] = bs; 2175 i+= bs; 2176 } else { 2177 sizes[j] = 1; 2178 i++; 2179 } 2180 } 2181 } 2182 *bs_max = j; 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2188 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2189 { 2190 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2191 PetscErrorCode ierr; 2192 PetscInt i,j,k,count,*rows; 2193 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2194 PetscScalar zero = 0.0; 2195 MatScalar *aa; 2196 const PetscScalar *xx; 2197 PetscScalar *bb; 2198 2199 PetscFunctionBegin; 2200 /* fix right hand side if needed */ 2201 if (x && b) { 2202 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2203 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2204 for (i=0; i<is_n; i++) { 2205 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2206 } 2207 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2208 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2209 } 2210 2211 /* Make a copy of the IS and sort it */ 2212 /* allocate memory for rows,sizes */ 2213 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2214 2215 /* copy IS values to rows, and sort them */ 2216 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2217 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2218 2219 if (baij->keepnonzeropattern) { 2220 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2221 bs_max = is_n; 2222 A->same_nonzero = PETSC_TRUE; 2223 } else { 2224 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2225 A->same_nonzero = PETSC_FALSE; 2226 } 2227 2228 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2229 row = rows[j]; 2230 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2231 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2232 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2233 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2234 if (diag != 0.0) { 2235 if (baij->ilen[row/bs] > 0) { 2236 baij->ilen[row/bs] = 1; 2237 baij->j[baij->i[row/bs]] = row/bs; 2238 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2239 } 2240 /* Now insert all the diagonal values for this bs */ 2241 for (k=0; k<bs; k++) { 2242 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 } else { /* (diag == 0.0) */ 2245 baij->ilen[row/bs] = 0; 2246 } /* end (diag == 0.0) */ 2247 } else { /* (sizes[i] != bs) */ 2248 #if defined (PETSC_USE_DEBUG) 2249 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2250 #endif 2251 for (k=0; k<count; k++) { 2252 aa[0] = zero; 2253 aa += bs; 2254 } 2255 if (diag != 0.0) { 2256 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2257 } 2258 } 2259 } 2260 2261 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2262 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2268 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2269 { 2270 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2271 PetscErrorCode ierr; 2272 PetscInt i,j,k,count; 2273 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2274 PetscScalar zero = 0.0; 2275 MatScalar *aa; 2276 const PetscScalar *xx; 2277 PetscScalar *bb; 2278 PetscBool *zeroed,vecs = PETSC_FALSE; 2279 2280 PetscFunctionBegin; 2281 /* fix right hand side if needed */ 2282 if (x && b) { 2283 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2284 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2285 vecs = PETSC_TRUE; 2286 } 2287 A->same_nonzero = PETSC_TRUE; 2288 2289 /* zero the columns */ 2290 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2291 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2292 for (i=0; i<is_n; i++) { 2293 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2294 zeroed[is_idx[i]] = PETSC_TRUE; 2295 } 2296 for (i=0; i<A->rmap->N; i++) { 2297 if (!zeroed[i]) { 2298 row = i/bs; 2299 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2300 for (k=0; k<bs; k++) { 2301 col = bs*baij->j[j] + k; 2302 if (zeroed[col]) { 2303 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2304 if (vecs) bb[i] -= aa[0]*xx[col]; 2305 aa[0] = 0.0; 2306 } 2307 } 2308 } 2309 } else if (vecs) bb[i] = diag*xx[i]; 2310 } 2311 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2312 if (vecs) { 2313 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2314 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2315 } 2316 2317 /* zero the rows */ 2318 for (i=0; i<is_n; i++) { 2319 row = is_idx[i]; 2320 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2321 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2322 for (k=0; k<count; k++) { 2323 aa[0] = zero; 2324 aa += bs; 2325 } 2326 if (diag != 0.0) { 2327 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2328 } 2329 } 2330 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2336 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2337 { 2338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2339 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2340 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2341 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2342 PetscErrorCode ierr; 2343 PetscInt ridx,cidx,bs2=a->bs2; 2344 PetscBool roworiented=a->roworiented; 2345 MatScalar *ap,value,*aa=a->a,*bap; 2346 2347 PetscFunctionBegin; 2348 if (v) PetscValidScalarPointer(v,6); 2349 for (k=0; k<m; k++) { /* loop over added rows */ 2350 row = im[k]; 2351 brow = row/bs; 2352 if (row < 0) continue; 2353 #if defined(PETSC_USE_DEBUG) 2354 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2355 #endif 2356 rp = aj + ai[brow]; 2357 ap = aa + bs2*ai[brow]; 2358 rmax = imax[brow]; 2359 nrow = ailen[brow]; 2360 low = 0; 2361 high = nrow; 2362 for (l=0; l<n; l++) { /* loop over added columns */ 2363 if (in[l] < 0) continue; 2364 #if defined(PETSC_USE_DEBUG) 2365 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2366 #endif 2367 col = in[l]; bcol = col/bs; 2368 ridx = row % bs; cidx = col % bs; 2369 if (roworiented) { 2370 value = v[l + k*n]; 2371 } else { 2372 value = v[k + l*m]; 2373 } 2374 if (col <= lastcol) low = 0; else high = nrow; 2375 lastcol = col; 2376 while (high-low > 7) { 2377 t = (low+high)/2; 2378 if (rp[t] > bcol) high = t; 2379 else low = t; 2380 } 2381 for (i=low; i<high; i++) { 2382 if (rp[i] > bcol) break; 2383 if (rp[i] == bcol) { 2384 bap = ap + bs2*i + bs*cidx + ridx; 2385 if (is == ADD_VALUES) *bap += value; 2386 else *bap = value; 2387 goto noinsert1; 2388 } 2389 } 2390 if (nonew == 1) goto noinsert1; 2391 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2392 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2393 N = nrow++ - 1; high++; 2394 /* shift up all the later entries in this row */ 2395 for (ii=N; ii>=i; ii--) { 2396 rp[ii+1] = rp[ii]; 2397 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2398 } 2399 if (N>=i) { 2400 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2401 } 2402 rp[i] = bcol; 2403 ap[bs2*i + bs*cidx + ridx] = value; 2404 a->nz++; 2405 noinsert1:; 2406 low = i; 2407 } 2408 ailen[brow] = nrow; 2409 } 2410 A->same_nonzero = PETSC_FALSE; 2411 PetscFunctionReturn(0); 2412 } 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2416 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2417 { 2418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2419 Mat outA; 2420 PetscErrorCode ierr; 2421 PetscBool row_identity,col_identity; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2425 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) { 2428 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2429 } 2430 2431 outA = inA; 2432 inA->factortype = MAT_FACTOR_LU; 2433 2434 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2435 2436 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2437 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2438 a->row = row; 2439 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2440 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2441 a->col = col; 2442 2443 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2444 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2445 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2446 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2447 2448 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2449 if (!a->solve_work) { 2450 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2451 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2452 } 2453 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2454 2455 PetscFunctionReturn(0); 2456 } 2457 2458 EXTERN_C_BEGIN 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2461 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2462 { 2463 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2464 PetscInt i,nz,mbs; 2465 2466 PetscFunctionBegin; 2467 nz = baij->maxnz; 2468 mbs = baij->mbs; 2469 for (i=0; i<nz; i++) { 2470 baij->j[i] = indices[i]; 2471 } 2472 baij->nz = nz; 2473 for (i=0; i<mbs; i++) { 2474 baij->ilen[i] = baij->imax[i]; 2475 } 2476 PetscFunctionReturn(0); 2477 } 2478 EXTERN_C_END 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2482 /*@ 2483 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2484 in the matrix. 2485 2486 Input Parameters: 2487 + mat - the SeqBAIJ matrix 2488 - indices - the column indices 2489 2490 Level: advanced 2491 2492 Notes: 2493 This can be called if you have precomputed the nonzero structure of the 2494 matrix and want to provide it to the matrix object to improve the performance 2495 of the MatSetValues() operation. 2496 2497 You MUST have set the correct numbers of nonzeros per row in the call to 2498 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2499 2500 MUST be called before any calls to MatSetValues(); 2501 2502 @*/ 2503 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2504 { 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2509 PetscValidPointer(indices,2); 2510 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2516 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2517 { 2518 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2519 PetscErrorCode ierr; 2520 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2521 PetscReal atmp; 2522 PetscScalar *x,zero = 0.0; 2523 MatScalar *aa; 2524 PetscInt ncols,brow,krow,kcol; 2525 2526 PetscFunctionBegin; 2527 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2528 bs = A->rmap->bs; 2529 aa = a->a; 2530 ai = a->i; 2531 aj = a->j; 2532 mbs = a->mbs; 2533 2534 ierr = VecSet(v,zero);CHKERRQ(ierr); 2535 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2536 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2537 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2538 for (i=0; i<mbs; i++) { 2539 ncols = ai[1] - ai[0]; ai++; 2540 brow = bs*i; 2541 for (j=0; j<ncols; j++){ 2542 for (kcol=0; kcol<bs; kcol++){ 2543 for (krow=0; krow<bs; krow++){ 2544 atmp = PetscAbsScalar(*aa);aa++; 2545 row = brow + krow; /* row index */ 2546 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2547 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2548 } 2549 } 2550 aj++; 2551 } 2552 } 2553 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatCopy_SeqBAIJ" 2559 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 /* If the two matrices have the same copy implementation, use fast copy. */ 2565 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2566 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2567 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2568 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2569 2570 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2571 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2572 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2573 } else { 2574 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2581 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2582 { 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2592 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2593 { 2594 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2595 PetscFunctionBegin; 2596 *array = a->a; 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2602 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2603 { 2604 PetscFunctionBegin; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2610 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2611 { 2612 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2613 PetscErrorCode ierr; 2614 PetscInt i,bs=Y->rmap->bs,j,bs2; 2615 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2616 2617 PetscFunctionBegin; 2618 if (str == SAME_NONZERO_PATTERN) { 2619 PetscScalar alpha = a; 2620 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2621 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2622 if (y->xtoy && y->XtoY != X) { 2623 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2624 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2625 } 2626 if (!y->xtoy) { /* get xtoy */ 2627 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2628 y->XtoY = X; 2629 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2630 } 2631 bs2 = bs*bs; 2632 for (i=0; i<x->nz; i++) { 2633 j = 0; 2634 while (j < bs2){ 2635 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2636 j++; 2637 } 2638 } 2639 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2640 } else { 2641 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2642 } 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2648 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2649 { 2650 PetscInt rbs,cbs; 2651 PetscErrorCode ierr; 2652 2653 PetscFunctionBegin; 2654 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2655 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2656 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2657 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2663 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2664 { 2665 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2666 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2667 MatScalar *aa = a->a; 2668 2669 PetscFunctionBegin; 2670 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2676 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2677 { 2678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2679 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2680 MatScalar *aa = a->a; 2681 2682 PetscFunctionBegin; 2683 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2691 /* 2692 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2693 */ 2694 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2695 { 2696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2697 PetscErrorCode ierr; 2698 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2699 PetscInt nz = a->i[m],row,*jj,mr,col; 2700 2701 PetscFunctionBegin; 2702 *nn = n; 2703 if (!ia) PetscFunctionReturn(0); 2704 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2705 else { 2706 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2707 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2708 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2709 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2710 jj = a->j; 2711 for (i=0; i<nz; i++) { 2712 collengths[jj[i]]++; 2713 } 2714 cia[0] = oshift; 2715 for (i=0; i<n; i++) { 2716 cia[i+1] = cia[i] + collengths[i]; 2717 } 2718 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2719 jj = a->j; 2720 for (row=0; row<m; row++) { 2721 mr = a->i[row+1] - a->i[row]; 2722 for (i=0; i<mr; i++) { 2723 col = *jj++; 2724 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2725 } 2726 } 2727 ierr = PetscFree(collengths);CHKERRQ(ierr); 2728 *ia = cia; *ja = cja; 2729 } 2730 PetscFunctionReturn(0); 2731 } 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2735 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2736 { 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 if (!ia) PetscFunctionReturn(0); 2741 ierr = PetscFree(*ia);CHKERRQ(ierr); 2742 ierr = PetscFree(*ja);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2748 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2749 { 2750 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2751 PetscErrorCode ierr; 2752 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2753 PetscScalar dx,*y,*xx,*w3_array; 2754 PetscScalar *vscale_array; 2755 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2756 Vec w1=coloring->w1,w2=coloring->w2,w3; 2757 void *fctx = coloring->fctx; 2758 PetscBool flg = PETSC_FALSE; 2759 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2760 Vec x1_tmp; 2761 2762 PetscFunctionBegin; 2763 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2764 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2765 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2766 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2767 2768 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2769 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2770 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2771 if (flg) { 2772 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2773 } else { 2774 PetscBool assembled; 2775 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2776 if (assembled) { 2777 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2778 } 2779 } 2780 2781 x1_tmp = x1; 2782 if (!coloring->vscale){ 2783 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2784 } 2785 2786 /* 2787 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2788 coloring->F for the coarser grids from the finest 2789 */ 2790 if (coloring->F) { 2791 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2792 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2793 if (m1 != m2) { 2794 coloring->F = 0; 2795 } 2796 } 2797 2798 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2799 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2800 } 2801 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2802 2803 /* Set w1 = F(x1) */ 2804 if (coloring->F) { 2805 w1 = coloring->F; /* use already computed value of function */ 2806 coloring->F = 0; 2807 } else { 2808 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2809 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2810 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2811 } 2812 2813 if (!coloring->w3) { 2814 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2815 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2816 } 2817 w3 = coloring->w3; 2818 2819 CHKMEMQ; 2820 /* Compute all the local scale factors, including ghost points */ 2821 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2822 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2823 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2824 if (ctype == IS_COLORING_GHOSTED){ 2825 col_start = 0; col_end = N; 2826 } else if (ctype == IS_COLORING_GLOBAL){ 2827 xx = xx - start; 2828 vscale_array = vscale_array - start; 2829 col_start = start; col_end = N + start; 2830 } CHKMEMQ; 2831 for (col=col_start; col<col_end; col++){ 2832 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2833 if (coloring->htype[0] == 'w') { 2834 dx = 1.0 + unorm; 2835 } else { 2836 dx = xx[col]; 2837 } 2838 if (dx == 0.0) dx = 1.0; 2839 #if !defined(PETSC_USE_COMPLEX) 2840 if (dx < umin && dx >= 0.0) dx = umin; 2841 else if (dx < 0.0 && dx > -umin) dx = -umin; 2842 #else 2843 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2844 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2845 #endif 2846 dx *= epsilon; 2847 vscale_array[col] = 1.0/dx; 2848 } CHKMEMQ; 2849 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2850 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2851 if (ctype == IS_COLORING_GLOBAL){ 2852 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2853 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2854 } 2855 CHKMEMQ; 2856 if (coloring->vscaleforrow) { 2857 vscaleforrow = coloring->vscaleforrow; 2858 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2859 2860 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2861 /* 2862 Loop over each color 2863 */ 2864 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2865 for (k=0; k<coloring->ncolors; k++) { 2866 coloring->currentcolor = k; 2867 for (i=0; i<bs; i++) { 2868 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2869 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2870 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2871 /* 2872 Loop over each column associated with color 2873 adding the perturbation to the vector w3. 2874 */ 2875 for (l=0; l<coloring->ncolumns[k]; l++) { 2876 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2877 if (coloring->htype[0] == 'w') { 2878 dx = 1.0 + unorm; 2879 } else { 2880 dx = xx[col]; 2881 } 2882 if (dx == 0.0) dx = 1.0; 2883 #if !defined(PETSC_USE_COMPLEX) 2884 if (dx < umin && dx >= 0.0) dx = umin; 2885 else if (dx < 0.0 && dx > -umin) dx = -umin; 2886 #else 2887 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2888 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2889 #endif 2890 dx *= epsilon; 2891 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2892 w3_array[col] += dx; 2893 } 2894 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2895 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2896 2897 /* 2898 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2899 w2 = F(x1 + dx) - F(x1) 2900 */ 2901 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2902 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2903 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2904 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2905 2906 /* 2907 Loop over rows of vector, putting results into Jacobian matrix 2908 */ 2909 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2910 for (l=0; l<coloring->nrows[k]; l++) { 2911 row = bs*coloring->rows[k][l]; /* local row index */ 2912 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2913 for (j=0; j<bs; j++) { 2914 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2915 srows[j] = row + start + j; 2916 } 2917 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2918 } 2919 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2920 } 2921 } /* endof for each color */ 2922 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2923 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2924 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2925 ierr = PetscFree(srows);CHKERRQ(ierr); 2926 2927 coloring->currentcolor = -1; 2928 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2929 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2930 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 /* -------------------------------------------------------------------*/ 2935 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2936 MatGetRow_SeqBAIJ, 2937 MatRestoreRow_SeqBAIJ, 2938 MatMult_SeqBAIJ_N, 2939 /* 4*/ MatMultAdd_SeqBAIJ_N, 2940 MatMultTranspose_SeqBAIJ, 2941 MatMultTransposeAdd_SeqBAIJ, 2942 0, 2943 0, 2944 0, 2945 /*10*/ 0, 2946 MatLUFactor_SeqBAIJ, 2947 0, 2948 0, 2949 MatTranspose_SeqBAIJ, 2950 /*15*/ MatGetInfo_SeqBAIJ, 2951 MatEqual_SeqBAIJ, 2952 MatGetDiagonal_SeqBAIJ, 2953 MatDiagonalScale_SeqBAIJ, 2954 MatNorm_SeqBAIJ, 2955 /*20*/ 0, 2956 MatAssemblyEnd_SeqBAIJ, 2957 MatSetOption_SeqBAIJ, 2958 MatZeroEntries_SeqBAIJ, 2959 /*24*/ MatZeroRows_SeqBAIJ, 2960 0, 2961 0, 2962 0, 2963 0, 2964 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2965 0, 2966 0, 2967 MatGetArray_SeqBAIJ, 2968 MatRestoreArray_SeqBAIJ, 2969 /*34*/ MatDuplicate_SeqBAIJ, 2970 0, 2971 0, 2972 MatILUFactor_SeqBAIJ, 2973 0, 2974 /*39*/ MatAXPY_SeqBAIJ, 2975 MatGetSubMatrices_SeqBAIJ, 2976 MatIncreaseOverlap_SeqBAIJ, 2977 MatGetValues_SeqBAIJ, 2978 MatCopy_SeqBAIJ, 2979 /*44*/ 0, 2980 MatScale_SeqBAIJ, 2981 0, 2982 0, 2983 MatZeroRowsColumns_SeqBAIJ, 2984 /*49*/ MatSetBlockSize_SeqBAIJ, 2985 MatGetRowIJ_SeqBAIJ, 2986 MatRestoreRowIJ_SeqBAIJ, 2987 MatGetColumnIJ_SeqBAIJ, 2988 MatRestoreColumnIJ_SeqBAIJ, 2989 /*54*/ MatFDColoringCreate_SeqAIJ, 2990 0, 2991 0, 2992 0, 2993 MatSetValuesBlocked_SeqBAIJ, 2994 /*59*/ MatGetSubMatrix_SeqBAIJ, 2995 MatDestroy_SeqBAIJ, 2996 MatView_SeqBAIJ, 2997 0, 2998 0, 2999 /*64*/ 0, 3000 0, 3001 0, 3002 0, 3003 0, 3004 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 3005 0, 3006 MatConvert_Basic, 3007 0, 3008 0, 3009 /*74*/ 0, 3010 MatFDColoringApply_BAIJ, 3011 0, 3012 0, 3013 0, 3014 /*79*/ 0, 3015 0, 3016 0, 3017 0, 3018 MatLoad_SeqBAIJ, 3019 /*84*/ 0, 3020 0, 3021 0, 3022 0, 3023 0, 3024 /*89*/ 0, 3025 0, 3026 0, 3027 0, 3028 0, 3029 /*94*/ 0, 3030 0, 3031 0, 3032 0, 3033 0, 3034 /*99*/0, 3035 0, 3036 0, 3037 0, 3038 0, 3039 /*104*/0, 3040 MatRealPart_SeqBAIJ, 3041 MatImaginaryPart_SeqBAIJ, 3042 0, 3043 0, 3044 /*109*/0, 3045 0, 3046 0, 3047 0, 3048 MatMissingDiagonal_SeqBAIJ, 3049 /*114*/0, 3050 0, 3051 0, 3052 0, 3053 0, 3054 /*119*/0, 3055 0, 3056 MatMultHermitianTranspose_SeqBAIJ, 3057 MatMultHermitianTransposeAdd_SeqBAIJ, 3058 0, 3059 }; 3060 3061 EXTERN_C_BEGIN 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3064 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3065 { 3066 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3067 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3068 PetscErrorCode ierr; 3069 3070 PetscFunctionBegin; 3071 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3072 3073 /* allocate space for values if not already there */ 3074 if (!aij->saved_values) { 3075 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3076 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3077 } 3078 3079 /* copy values over */ 3080 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 EXTERN_C_END 3084 3085 EXTERN_C_BEGIN 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3088 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3089 { 3090 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3091 PetscErrorCode ierr; 3092 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3093 3094 PetscFunctionBegin; 3095 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3096 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3097 3098 /* copy values over */ 3099 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 EXTERN_C_END 3103 3104 EXTERN_C_BEGIN 3105 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3106 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3107 EXTERN_C_END 3108 3109 EXTERN_C_BEGIN 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3112 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3113 { 3114 Mat_SeqBAIJ *b; 3115 PetscErrorCode ierr; 3116 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3117 PetscBool flg,skipallocation = PETSC_FALSE; 3118 3119 PetscFunctionBegin; 3120 3121 if (nz == MAT_SKIP_ALLOCATION) { 3122 skipallocation = PETSC_TRUE; 3123 nz = 0; 3124 } 3125 3126 if (bs < 0) { 3127 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3128 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3129 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3130 bs = PetscAbs(bs); 3131 } 3132 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3133 bs = newbs; 3134 3135 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3136 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3137 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3138 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3139 3140 B->preallocated = PETSC_TRUE; 3141 3142 mbs = B->rmap->n/bs; 3143 nbs = B->cmap->n/bs; 3144 bs2 = bs*bs; 3145 3146 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3147 3148 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3149 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3150 if (nnz) { 3151 for (i=0; i<mbs; i++) { 3152 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3153 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3154 } 3155 } 3156 3157 b = (Mat_SeqBAIJ*)B->data; 3158 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3159 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3160 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3161 3162 if (!flg) { 3163 switch (bs) { 3164 case 1: 3165 B->ops->mult = MatMult_SeqBAIJ_1; 3166 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3167 B->ops->sor = MatSOR_SeqBAIJ_1; 3168 break; 3169 case 2: 3170 B->ops->mult = MatMult_SeqBAIJ_2; 3171 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3172 B->ops->sor = MatSOR_SeqBAIJ_2; 3173 break; 3174 case 3: 3175 B->ops->mult = MatMult_SeqBAIJ_3; 3176 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3177 B->ops->sor = MatSOR_SeqBAIJ_3; 3178 break; 3179 case 4: 3180 B->ops->mult = MatMult_SeqBAIJ_4; 3181 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3182 B->ops->sor = MatSOR_SeqBAIJ_4; 3183 break; 3184 case 5: 3185 B->ops->mult = MatMult_SeqBAIJ_5; 3186 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3187 B->ops->sor = MatSOR_SeqBAIJ_5; 3188 break; 3189 case 6: 3190 B->ops->mult = MatMult_SeqBAIJ_6; 3191 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3192 B->ops->sor = MatSOR_SeqBAIJ_6; 3193 break; 3194 case 7: 3195 B->ops->mult = MatMult_SeqBAIJ_7; 3196 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3197 B->ops->sor = MatSOR_SeqBAIJ_7; 3198 break; 3199 case 15: 3200 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3201 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3202 B->ops->sor = MatSOR_SeqBAIJ_N; 3203 break; 3204 default: 3205 B->ops->mult = MatMult_SeqBAIJ_N; 3206 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3207 B->ops->sor = MatSOR_SeqBAIJ_N; 3208 break; 3209 } 3210 } 3211 B->rmap->bs = bs; 3212 b->mbs = mbs; 3213 b->nbs = nbs; 3214 if (!skipallocation) { 3215 if (!b->imax) { 3216 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3217 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3218 b->free_imax_ilen = PETSC_TRUE; 3219 } 3220 /* b->ilen will count nonzeros in each block row so far. */ 3221 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3222 if (!nnz) { 3223 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3224 else if (nz < 0) nz = 1; 3225 for (i=0; i<mbs; i++) b->imax[i] = nz; 3226 nz = nz*mbs; 3227 } else { 3228 nz = 0; 3229 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3230 } 3231 3232 /* allocate the matrix space */ 3233 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3234 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3235 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3236 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3237 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3238 b->singlemalloc = PETSC_TRUE; 3239 b->i[0] = 0; 3240 for (i=1; i<mbs+1; i++) { 3241 b->i[i] = b->i[i-1] + b->imax[i-1]; 3242 } 3243 b->free_a = PETSC_TRUE; 3244 b->free_ij = PETSC_TRUE; 3245 } else { 3246 b->free_a = PETSC_FALSE; 3247 b->free_ij = PETSC_FALSE; 3248 } 3249 3250 B->rmap->bs = bs; 3251 b->bs2 = bs2; 3252 b->mbs = mbs; 3253 b->nz = 0; 3254 b->maxnz = nz; 3255 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3256 PetscFunctionReturn(0); 3257 } 3258 EXTERN_C_END 3259 3260 EXTERN_C_BEGIN 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3263 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3264 { 3265 PetscInt i,m,nz,nz_max=0,*nnz; 3266 PetscScalar *values=0; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3271 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3272 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3273 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3274 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3275 m = B->rmap->n/bs; 3276 3277 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3278 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3279 for(i=0; i<m; i++) { 3280 nz = ii[i+1]- ii[i]; 3281 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3282 nz_max = PetscMax(nz_max, nz); 3283 nnz[i] = nz; 3284 } 3285 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3286 ierr = PetscFree(nnz);CHKERRQ(ierr); 3287 3288 values = (PetscScalar*)V; 3289 if (!values) { 3290 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3291 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3292 } 3293 for (i=0; i<m; i++) { 3294 PetscInt ncols = ii[i+1] - ii[i]; 3295 const PetscInt *icols = jj + ii[i]; 3296 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3297 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3298 } 3299 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 3303 PetscFunctionReturn(0); 3304 } 3305 EXTERN_C_END 3306 3307 3308 EXTERN_C_BEGIN 3309 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3310 #if defined(PETSC_HAVE_MUMPS) 3311 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3312 #endif 3313 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3314 EXTERN_C_END 3315 3316 /*MC 3317 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3318 block sparse compressed row format. 3319 3320 Options Database Keys: 3321 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3322 3323 Level: beginner 3324 3325 .seealso: MatCreateSeqBAIJ() 3326 M*/ 3327 3328 EXTERN_C_BEGIN 3329 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3330 EXTERN_C_END 3331 3332 EXTERN_C_BEGIN 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "MatCreate_SeqBAIJ" 3335 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3336 { 3337 PetscErrorCode ierr; 3338 PetscMPIInt size; 3339 Mat_SeqBAIJ *b; 3340 3341 PetscFunctionBegin; 3342 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3343 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3344 3345 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3346 B->data = (void*)b; 3347 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3348 b->row = 0; 3349 b->col = 0; 3350 b->icol = 0; 3351 b->reallocs = 0; 3352 b->saved_values = 0; 3353 3354 b->roworiented = PETSC_TRUE; 3355 b->nonew = 0; 3356 b->diag = 0; 3357 b->solve_work = 0; 3358 b->mult_work = 0; 3359 B->spptr = 0; 3360 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3361 b->keepnonzeropattern = PETSC_FALSE; 3362 b->xtoy = 0; 3363 b->XtoY = 0; 3364 B->same_nonzero = PETSC_FALSE; 3365 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3367 "MatGetFactorAvailable_seqbaij_petsc", 3368 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3370 "MatGetFactor_seqbaij_petsc", 3371 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3372 #if defined(PETSC_HAVE_MUMPS) 3373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3374 #endif 3375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3376 "MatInvertBlockDiagonal_SeqBAIJ", 3377 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3379 "MatStoreValues_SeqBAIJ", 3380 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3382 "MatRetrieveValues_SeqBAIJ", 3383 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3385 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3386 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3387 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3388 "MatConvert_SeqBAIJ_SeqAIJ", 3389 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3390 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3391 "MatConvert_SeqBAIJ_SeqSBAIJ", 3392 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3393 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3394 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3395 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3397 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3398 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3400 "MatConvert_SeqBAIJ_SeqBSTRM", 3401 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3403 "MatIsTranspose_SeqBAIJ", 3404 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3405 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3406 PetscFunctionReturn(0); 3407 } 3408 EXTERN_C_END 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3412 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3413 { 3414 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3415 PetscErrorCode ierr; 3416 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3417 3418 PetscFunctionBegin; 3419 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3420 3421 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3422 c->imax = a->imax; 3423 c->ilen = a->ilen; 3424 c->free_imax_ilen = PETSC_FALSE; 3425 } else { 3426 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3427 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3428 for (i=0; i<mbs; i++) { 3429 c->imax[i] = a->imax[i]; 3430 c->ilen[i] = a->ilen[i]; 3431 } 3432 c->free_imax_ilen = PETSC_TRUE; 3433 } 3434 3435 /* allocate the matrix space */ 3436 if (mallocmatspace){ 3437 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3438 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3439 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3440 c->singlemalloc = PETSC_FALSE; 3441 c->free_ij = PETSC_FALSE; 3442 c->i = a->i; 3443 c->j = a->j; 3444 c->parent = A; 3445 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3446 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3447 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3448 } else { 3449 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3450 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3451 c->singlemalloc = PETSC_TRUE; 3452 c->free_ij = PETSC_TRUE; 3453 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3454 if (mbs > 0) { 3455 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3456 if (cpvalues == MAT_COPY_VALUES) { 3457 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3458 } else { 3459 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3460 } 3461 } 3462 } 3463 } 3464 3465 c->roworiented = a->roworiented; 3466 c->nonew = a->nonew; 3467 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3468 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3469 c->bs2 = a->bs2; 3470 c->mbs = a->mbs; 3471 c->nbs = a->nbs; 3472 3473 if (a->diag) { 3474 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3475 c->diag = a->diag; 3476 c->free_diag = PETSC_FALSE; 3477 } else { 3478 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3479 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3480 for (i=0; i<mbs; i++) { 3481 c->diag[i] = a->diag[i]; 3482 } 3483 c->free_diag = PETSC_TRUE; 3484 } 3485 } else c->diag = 0; 3486 c->nz = a->nz; 3487 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3488 c->solve_work = 0; 3489 c->mult_work = 0; 3490 c->free_a = PETSC_TRUE; 3491 c->free_ij = PETSC_TRUE; 3492 C->preallocated = PETSC_TRUE; 3493 C->assembled = PETSC_TRUE; 3494 3495 c->compressedrow.use = a->compressedrow.use; 3496 c->compressedrow.nrows = a->compressedrow.nrows; 3497 c->compressedrow.check = a->compressedrow.check; 3498 if (a->compressedrow.use){ 3499 i = a->compressedrow.nrows; 3500 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3501 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3502 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3503 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3504 } else { 3505 c->compressedrow.use = PETSC_FALSE; 3506 c->compressedrow.i = PETSC_NULL; 3507 c->compressedrow.rindex = PETSC_NULL; 3508 } 3509 C->same_nonzero = A->same_nonzero; 3510 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3511 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3512 PetscFunctionReturn(0); 3513 } 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3517 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3518 { 3519 PetscErrorCode ierr; 3520 3521 PetscFunctionBegin; 3522 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3523 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3524 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3525 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3526 PetscFunctionReturn(0); 3527 } 3528 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "MatLoad_SeqBAIJ" 3531 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3532 { 3533 Mat_SeqBAIJ *a; 3534 PetscErrorCode ierr; 3535 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3536 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3537 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3538 PetscInt *masked,nmask,tmp,bs2,ishift; 3539 PetscMPIInt size; 3540 int fd; 3541 PetscScalar *aa; 3542 MPI_Comm comm = ((PetscObject)viewer)->comm; 3543 3544 PetscFunctionBegin; 3545 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3546 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3547 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3548 bs2 = bs*bs; 3549 3550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3551 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3552 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3553 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3554 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3555 M = header[1]; N = header[2]; nz = header[3]; 3556 3557 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3558 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3559 3560 /* 3561 This code adds extra rows to make sure the number of rows is 3562 divisible by the blocksize 3563 */ 3564 mbs = M/bs; 3565 extra_rows = bs - M + bs*(mbs); 3566 if (extra_rows == bs) extra_rows = 0; 3567 else mbs++; 3568 if (extra_rows) { 3569 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3570 } 3571 3572 /* Set global sizes if not already set */ 3573 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3574 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3575 } else { /* Check if the matrix global sizes are correct */ 3576 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3577 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3578 } 3579 3580 /* read in row lengths */ 3581 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3582 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3583 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3584 3585 /* read in column indices */ 3586 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3587 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3588 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3589 3590 /* loop over row lengths determining block row lengths */ 3591 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3592 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3593 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3594 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3595 rowcount = 0; 3596 nzcount = 0; 3597 for (i=0; i<mbs; i++) { 3598 nmask = 0; 3599 for (j=0; j<bs; j++) { 3600 kmax = rowlengths[rowcount]; 3601 for (k=0; k<kmax; k++) { 3602 tmp = jj[nzcount++]/bs; 3603 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3604 } 3605 rowcount++; 3606 } 3607 browlengths[i] += nmask; 3608 /* zero out the mask elements we set */ 3609 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3610 } 3611 3612 /* Do preallocation */ 3613 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3614 a = (Mat_SeqBAIJ*)newmat->data; 3615 3616 /* set matrix "i" values */ 3617 a->i[0] = 0; 3618 for (i=1; i<= mbs; i++) { 3619 a->i[i] = a->i[i-1] + browlengths[i-1]; 3620 a->ilen[i-1] = browlengths[i-1]; 3621 } 3622 a->nz = 0; 3623 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3624 3625 /* read in nonzero values */ 3626 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3627 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3628 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3629 3630 /* set "a" and "j" values into matrix */ 3631 nzcount = 0; jcount = 0; 3632 for (i=0; i<mbs; i++) { 3633 nzcountb = nzcount; 3634 nmask = 0; 3635 for (j=0; j<bs; j++) { 3636 kmax = rowlengths[i*bs+j]; 3637 for (k=0; k<kmax; k++) { 3638 tmp = jj[nzcount++]/bs; 3639 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3640 } 3641 } 3642 /* sort the masked values */ 3643 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3644 3645 /* set "j" values into matrix */ 3646 maskcount = 1; 3647 for (j=0; j<nmask; j++) { 3648 a->j[jcount++] = masked[j]; 3649 mask[masked[j]] = maskcount++; 3650 } 3651 /* set "a" values into matrix */ 3652 ishift = bs2*a->i[i]; 3653 for (j=0; j<bs; j++) { 3654 kmax = rowlengths[i*bs+j]; 3655 for (k=0; k<kmax; k++) { 3656 tmp = jj[nzcountb]/bs ; 3657 block = mask[tmp] - 1; 3658 point = jj[nzcountb] - bs*tmp; 3659 idx = ishift + bs2*block + j + bs*point; 3660 a->a[idx] = (MatScalar)aa[nzcountb++]; 3661 } 3662 } 3663 /* zero out the mask elements we set */ 3664 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3665 } 3666 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3667 3668 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3669 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3670 ierr = PetscFree(aa);CHKERRQ(ierr); 3671 ierr = PetscFree(jj);CHKERRQ(ierr); 3672 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3673 3674 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3675 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3677 PetscFunctionReturn(0); 3678 } 3679 3680 #undef __FUNCT__ 3681 #define __FUNCT__ "MatCreateSeqBAIJ" 3682 /*@C 3683 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3684 compressed row) format. For good matrix assembly performance the 3685 user should preallocate the matrix storage by setting the parameter nz 3686 (or the array nnz). By setting these parameters accurately, performance 3687 during matrix assembly can be increased by more than a factor of 50. 3688 3689 Collective on MPI_Comm 3690 3691 Input Parameters: 3692 + comm - MPI communicator, set to PETSC_COMM_SELF 3693 . bs - size of block 3694 . m - number of rows 3695 . n - number of columns 3696 . nz - number of nonzero blocks per block row (same for all rows) 3697 - nnz - array containing the number of nonzero blocks in the various block rows 3698 (possibly different for each block row) or PETSC_NULL 3699 3700 Output Parameter: 3701 . A - the matrix 3702 3703 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3704 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3705 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3706 3707 Options Database Keys: 3708 . -mat_no_unroll - uses code that does not unroll the loops in the 3709 block calculations (much slower) 3710 . -mat_block_size - size of the blocks to use 3711 3712 Level: intermediate 3713 3714 Notes: 3715 The number of rows and columns must be divisible by blocksize. 3716 3717 If the nnz parameter is given then the nz parameter is ignored 3718 3719 A nonzero block is any block that as 1 or more nonzeros in it 3720 3721 The block AIJ format is fully compatible with standard Fortran 77 3722 storage. That is, the stored row and column indices can begin at 3723 either one (as in Fortran) or zero. See the users' manual for details. 3724 3725 Specify the preallocated storage with either nz or nnz (not both). 3726 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3727 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3728 matrices. 3729 3730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3731 @*/ 3732 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3733 { 3734 PetscErrorCode ierr; 3735 3736 PetscFunctionBegin; 3737 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3738 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3739 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3740 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3741 PetscFunctionReturn(0); 3742 } 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3746 /*@C 3747 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3748 per row in the matrix. For good matrix assembly performance the 3749 user should preallocate the matrix storage by setting the parameter nz 3750 (or the array nnz). By setting these parameters accurately, performance 3751 during matrix assembly can be increased by more than a factor of 50. 3752 3753 Collective on MPI_Comm 3754 3755 Input Parameters: 3756 + A - the matrix 3757 . bs - size of block 3758 . nz - number of block nonzeros per block row (same for all rows) 3759 - nnz - array containing the number of block nonzeros in the various block rows 3760 (possibly different for each block row) or PETSC_NULL 3761 3762 Options Database Keys: 3763 . -mat_no_unroll - uses code that does not unroll the loops in the 3764 block calculations (much slower) 3765 . -mat_block_size - size of the blocks to use 3766 3767 Level: intermediate 3768 3769 Notes: 3770 If the nnz parameter is given then the nz parameter is ignored 3771 3772 You can call MatGetInfo() to get information on how effective the preallocation was; 3773 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3774 You can also run with the option -info and look for messages with the string 3775 malloc in them to see if additional memory allocation was needed. 3776 3777 The block AIJ format is fully compatible with standard Fortran 77 3778 storage. That is, the stored row and column indices can begin at 3779 either one (as in Fortran) or zero. See the users' manual for details. 3780 3781 Specify the preallocated storage with either nz or nnz (not both). 3782 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3783 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3784 3785 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3786 @*/ 3787 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3788 { 3789 PetscErrorCode ierr; 3790 3791 PetscFunctionBegin; 3792 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3793 PetscFunctionReturn(0); 3794 } 3795 3796 #undef __FUNCT__ 3797 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3798 /*@C 3799 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3800 (the default sequential PETSc format). 3801 3802 Collective on MPI_Comm 3803 3804 Input Parameters: 3805 + A - the matrix 3806 . i - the indices into j for the start of each local row (starts with zero) 3807 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3808 - v - optional values in the matrix 3809 3810 Level: developer 3811 3812 .keywords: matrix, aij, compressed row, sparse 3813 3814 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3815 @*/ 3816 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3817 { 3818 PetscErrorCode ierr; 3819 3820 PetscFunctionBegin; 3821 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3822 PetscFunctionReturn(0); 3823 } 3824 3825 3826 #undef __FUNCT__ 3827 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3828 /*@ 3829 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3830 3831 Collective on MPI_Comm 3832 3833 Input Parameters: 3834 + comm - must be an MPI communicator of size 1 3835 . bs - size of block 3836 . m - number of rows 3837 . n - number of columns 3838 . i - row indices 3839 . j - column indices 3840 - a - matrix values 3841 3842 Output Parameter: 3843 . mat - the matrix 3844 3845 Level: advanced 3846 3847 Notes: 3848 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3849 once the matrix is destroyed 3850 3851 You cannot set new nonzero locations into this matrix, that will generate an error. 3852 3853 The i and j indices are 0 based 3854 3855 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3856 3857 3858 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3859 3860 @*/ 3861 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3862 { 3863 PetscErrorCode ierr; 3864 PetscInt ii; 3865 Mat_SeqBAIJ *baij; 3866 3867 PetscFunctionBegin; 3868 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3869 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3870 3871 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3872 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3873 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3874 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3875 baij = (Mat_SeqBAIJ*)(*mat)->data; 3876 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3877 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3878 3879 baij->i = i; 3880 baij->j = j; 3881 baij->a = a; 3882 baij->singlemalloc = PETSC_FALSE; 3883 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3884 baij->free_a = PETSC_FALSE; 3885 baij->free_ij = PETSC_FALSE; 3886 3887 for (ii=0; ii<m; ii++) { 3888 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3889 #if defined(PETSC_USE_DEBUG) 3890 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3891 #endif 3892 } 3893 #if defined(PETSC_USE_DEBUG) 3894 for (ii=0; ii<baij->i[m]; ii++) { 3895 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3896 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3897 } 3898 #endif 3899 3900 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3901 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3902 PetscFunctionReturn(0); 3903 } 3904