1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 212 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 #undef __FUNCT__ 217 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 218 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 219 { 220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 221 PetscScalar *x,x1,x2,s1,s2; 222 const PetscScalar *b; 223 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 224 PetscErrorCode ierr; 225 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 226 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 227 228 PetscFunctionBegin; 229 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 235 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 246 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 248 i2 = 2; 249 idiag += 4; 250 for (i=1; i<m; i++) { 251 v = aa + 4*ai[i]; 252 vi = aj + ai[i]; 253 nz = diag[i] - ai[i]; 254 s1 = b[i2]; s2 = b[i2+1]; 255 for (j=0; j<nz; j++) { 256 idx = 2*vi[j]; 257 it = 4*j; 258 x1 = x[idx]; x2 = x[1+idx]; 259 s1 -= v[it]*x1 + v[it+2]*x2; 260 s2 -= v[it+1]*x1 + v[it+3]*x2; 261 } 262 x[i2] = idiag[0]*s1 + idiag[2]*s2; 263 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 264 idiag += 4; 265 i2 += 2; 266 } 267 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 268 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 269 } 270 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 271 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 272 i2 = 0; 273 mdiag = a->idiag+4*a->mbs; 274 for (i=0; i<m; i++) { 275 x1 = x[i2]; x2 = x[i2+1]; 276 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 277 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 278 mdiag += 4; 279 i2 += 2; 280 } 281 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 282 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 283 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 284 } 285 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 286 idiag = a->idiag+4*a->mbs - 4; 287 i2 = 2*m - 2; 288 x1 = x[i2]; x2 = x[i2+1]; 289 x[i2] = idiag[0]*x1 + idiag[2]*x2; 290 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 291 idiag -= 4; 292 i2 -= 2; 293 for (i=m-2; i>=0; i--) { 294 v = aa + 4*(diag[i]+1); 295 vi = aj + diag[i] + 1; 296 nz = ai[i+1] - diag[i] - 1; 297 s1 = x[i2]; s2 = x[i2+1]; 298 for (j=0; j<nz; j++) { 299 idx = 2*vi[j]; 300 it = 4*j; 301 x1 = x[idx]; x2 = x[1+idx]; 302 s1 -= v[it]*x1 + v[it+2]*x2; 303 s2 -= v[it+1]*x1 + v[it+3]*x2; 304 } 305 x[i2] = idiag[0]*s1 + idiag[2]*s2; 306 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 307 idiag -= 4; 308 i2 -= 2; 309 } 310 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 311 } 312 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 320 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 420 } 421 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 422 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 429 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 433 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 434 const PetscScalar *b; 435 PetscErrorCode ierr; 436 PetscInt m = a->mbs,i,i2,nz,idx; 437 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 438 439 PetscFunctionBegin; 440 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 441 its = its*lits; 442 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 443 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 444 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 445 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 446 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 447 448 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 449 450 diag = a->diag; 451 idiag = a->idiag; 452 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 453 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 454 455 if (flag & SOR_ZERO_INITIAL_GUESS) { 456 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 457 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 458 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 459 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 460 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 461 i2 = 4; 462 idiag += 16; 463 for (i=1; i<m; i++) { 464 v = aa + 16*ai[i]; 465 vi = aj + ai[i]; 466 nz = diag[i] - ai[i]; 467 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 468 while (nz--) { 469 idx = 4*(*vi++); 470 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 471 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 472 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 473 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 474 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 475 v += 16; 476 } 477 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 478 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 479 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 480 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 481 idiag += 16; 482 i2 += 4; 483 } 484 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 485 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 486 } 487 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 488 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 489 i2 = 0; 490 mdiag = a->idiag+16*a->mbs; 491 for (i=0; i<m; i++) { 492 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 493 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 494 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 495 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 496 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 497 mdiag += 16; 498 i2 += 4; 499 } 500 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 501 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 502 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 503 } 504 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 505 idiag = a->idiag+16*a->mbs - 16; 506 i2 = 4*m - 4; 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 508 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 509 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 510 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 511 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 512 idiag -= 16; 513 i2 -= 4; 514 for (i=m-2; i>=0; i--) { 515 v = aa + 16*(diag[i]+1); 516 vi = aj + diag[i] + 1; 517 nz = ai[i+1] - diag[i] - 1; 518 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 519 while (nz--) { 520 idx = 4*(*vi++); 521 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 522 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 523 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 524 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 525 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 526 v += 16; 527 } 528 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 529 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 530 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 531 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 532 idiag -= 16; 533 i2 -= 4; 534 } 535 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 536 } 537 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 538 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 545 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 546 { 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 549 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 550 const PetscScalar *b; 551 PetscErrorCode ierr; 552 PetscInt m = a->mbs,i,i2,nz,idx; 553 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 554 555 PetscFunctionBegin; 556 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 557 its = its*lits; 558 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 559 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 560 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 561 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 562 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 563 564 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 565 566 diag = a->diag; 567 idiag = a->idiag; 568 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 569 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 570 571 if (flag & SOR_ZERO_INITIAL_GUESS) { 572 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 573 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 574 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 575 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 576 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 577 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 578 i2 = 5; 579 idiag += 25; 580 for (i=1; i<m; i++) { 581 v = aa + 25*ai[i]; 582 vi = aj + ai[i]; 583 nz = diag[i] - ai[i]; 584 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 585 while (nz--) { 586 idx = 5*(*vi++); 587 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 588 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 589 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 590 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 591 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 592 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 593 v += 25; 594 } 595 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 596 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 597 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 598 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 599 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 600 idiag += 25; 601 i2 += 5; 602 } 603 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 604 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 605 } 606 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 607 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 608 i2 = 0; 609 mdiag = a->idiag+25*a->mbs; 610 for (i=0; i<m; i++) { 611 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 612 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 613 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 614 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 615 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 616 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 617 mdiag += 25; 618 i2 += 5; 619 } 620 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 621 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 622 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 623 } 624 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 625 idiag = a->idiag+25*a->mbs - 25; 626 i2 = 5*m - 5; 627 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 628 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 629 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 630 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 631 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 632 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 633 idiag -= 25; 634 i2 -= 5; 635 for (i=m-2; i>=0; i--) { 636 v = aa + 25*(diag[i]+1); 637 vi = aj + diag[i] + 1; 638 nz = ai[i+1] - diag[i] - 1; 639 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 640 while (nz--) { 641 idx = 5*(*vi++); 642 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 643 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 644 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 645 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 646 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 647 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 648 v += 25; 649 } 650 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 651 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 652 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 653 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 654 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 655 idiag -= 25; 656 i2 -= 5; 657 } 658 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 659 } 660 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 661 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 668 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 669 { 670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 671 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 672 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 673 const PetscScalar *b; 674 PetscErrorCode ierr; 675 PetscInt m = a->mbs,i,i2,nz,idx; 676 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 677 678 PetscFunctionBegin; 679 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 680 its = its*lits; 681 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 682 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 683 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 684 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 685 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 686 687 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 688 689 diag = a->diag; 690 idiag = a->idiag; 691 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 693 694 if (flag & SOR_ZERO_INITIAL_GUESS) { 695 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 696 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 697 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 698 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 699 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 700 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 701 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 702 i2 = 6; 703 idiag += 36; 704 for (i=1; i<m; i++) { 705 v = aa + 36*ai[i]; 706 vi = aj + ai[i]; 707 nz = diag[i] - ai[i]; 708 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 709 while (nz--) { 710 idx = 6*(*vi++); 711 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 712 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 713 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 714 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 715 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 716 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 717 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 718 v += 36; 719 } 720 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 721 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 722 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 723 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 724 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 725 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 726 idiag += 36; 727 i2 += 6; 728 } 729 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 730 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 731 } 732 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 733 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 734 i2 = 0; 735 mdiag = a->idiag+36*a->mbs; 736 for (i=0; i<m; i++) { 737 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 738 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 739 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 740 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 741 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 742 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 743 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 744 mdiag += 36; 745 i2 += 6; 746 } 747 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 748 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 749 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 750 } 751 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 752 idiag = a->idiag+36*a->mbs - 36; 753 i2 = 6*m - 6; 754 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 755 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 756 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 757 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 758 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 759 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 760 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 761 idiag -= 36; 762 i2 -= 6; 763 for (i=m-2; i>=0; i--) { 764 v = aa + 36*(diag[i]+1); 765 vi = aj + diag[i] + 1; 766 nz = ai[i+1] - diag[i] - 1; 767 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 768 while (nz--) { 769 idx = 6*(*vi++); 770 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 771 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 772 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 773 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 774 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 775 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 776 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 777 v += 36; 778 } 779 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 780 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 781 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 782 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 783 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 784 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 785 idiag -= 36; 786 i2 -= 6; 787 } 788 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 798 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 799 { 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 801 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 802 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 803 const PetscScalar *b; 804 PetscErrorCode ierr; 805 PetscInt m = a->mbs,i,i2,nz,idx; 806 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 807 808 PetscFunctionBegin; 809 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 810 its = its*lits; 811 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 812 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 813 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 814 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 815 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 816 817 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 818 819 diag = a->diag; 820 idiag = a->idiag; 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 823 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 826 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 827 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 828 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 829 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 830 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 831 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 832 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 833 i2 = 7; 834 idiag += 49; 835 for (i=1; i<m; i++) { 836 v = aa + 49*ai[i]; 837 vi = aj + ai[i]; 838 nz = diag[i] - ai[i]; 839 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 843 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 844 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 845 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 846 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 847 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 848 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 849 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 850 v += 49; 851 } 852 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 853 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 854 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 855 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 856 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 857 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 858 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 859 idiag += 49; 860 i2 += 7; 861 } 862 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 863 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 864 } 865 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 866 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 867 i2 = 0; 868 mdiag = a->idiag+49*a->mbs; 869 for (i=0; i<m; i++) { 870 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 871 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 872 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 873 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 874 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 875 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 876 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 877 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 878 mdiag += 49; 879 i2 += 7; 880 } 881 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 882 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 883 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 886 idiag = a->idiag+49*a->mbs - 49; 887 i2 = 7*m - 7; 888 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 889 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 890 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 891 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 892 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 893 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 894 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 895 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 896 idiag -= 49; 897 i2 -= 7; 898 for (i=m-2; i>=0; i--) { 899 v = aa + 49*(diag[i]+1); 900 vi = aj + diag[i] + 1; 901 nz = ai[i+1] - diag[i] - 1; 902 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 903 while (nz--) { 904 idx = 7*(*vi++); 905 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 906 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 907 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 908 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 909 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 910 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 911 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 912 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 913 v += 49; 914 } 915 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 916 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 917 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 918 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 919 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 920 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 921 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 926 } 927 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 935 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 936 { 937 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 938 PetscScalar *x,*work,*w,*workt; 939 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 940 const PetscScalar *b; 941 PetscErrorCode ierr; 942 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 943 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 944 945 PetscFunctionBegin; 946 its = its*lits; 947 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 948 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 949 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 950 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 951 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 952 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 953 954 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 955 956 diag = a->diag; 957 idiag = a->idiag; 958 if (!a->mult_work) { 959 k = PetscMax(A->rmap->n,A->cmap->n); 960 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 961 } 962 work = a->mult_work; 963 if (!a->sor_work) { 964 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 965 } 966 w = a->sor_work; 967 968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 969 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 970 971 if (flag & SOR_ZERO_INITIAL_GUESS) { 972 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 973 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 974 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 975 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 976 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 977 i2 = bs; 978 idiag += bs2; 979 for (i=1; i<m; i++) { 980 v = aa + bs2*ai[i]; 981 vi = aj + ai[i]; 982 nz = diag[i] - ai[i]; 983 984 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 985 /* copy all rows of x that are needed into contiguous space */ 986 workt = work; 987 for (j=0; j<nz; j++) { 988 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 989 workt += bs; 990 } 991 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 992 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 993 while (nz--) { 994 idx = N*(*vi++); 995 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 996 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 997 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 998 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 999 v += N2; 1000 } */ 1001 1002 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1003 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1004 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1005 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1006 1007 idiag += bs2; 1008 i2 += bs; 1009 } 1010 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1011 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1012 } 1013 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1014 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1015 i2 = 0; 1016 mdiag = a->idiag+bs2*a->mbs; 1017 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1018 for (i=0; i<m; i++) { 1019 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1020 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1021 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1022 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1023 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1024 1025 mdiag += bs2; 1026 i2 += bs; 1027 } 1028 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1029 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1030 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1031 } 1032 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1033 idiag = a->idiag+bs2*a->mbs - bs2; 1034 i2 = bs*m - bs; 1035 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1036 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1037 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1038 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1039 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1040 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1041 idiag -= bs2; 1042 i2 -= bs; 1043 for (i=m-2; i>=0; i--) { 1044 v = aa + bs2*(diag[i]+1); 1045 vi = aj + diag[i] + 1; 1046 nz = ai[i+1] - diag[i] - 1; 1047 1048 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 /* copy all rows of x that are needed into contiguous space */ 1050 workt = work; 1051 for (j=0; j<nz; j++) { 1052 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 workt += bs; 1054 } 1055 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1056 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1057 while (nz--) { 1058 idx = N*(*vi++); 1059 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1060 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1061 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1062 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1063 v += N2; 1064 } */ 1065 1066 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1067 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1068 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1069 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1070 idiag -= bs2; 1071 i2 -= bs; 1072 } 1073 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1074 } 1075 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1076 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1077 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 /* 1082 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1083 */ 1084 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1085 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1087 #define matsetvaluesblocked4_ matsetvaluesblocked4 1088 #endif 1089 1090 EXTERN_C_BEGIN 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "matsetvaluesblocked4_" 1093 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1094 { 1095 Mat A = *AA; 1096 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1097 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1098 PetscInt *ai=a->i,*ailen=a->ilen; 1099 PetscInt *aj=a->j,stepval,lastcol = -1; 1100 const PetscScalar *value = v; 1101 MatScalar *ap,*aa = a->a,*bap; 1102 1103 PetscFunctionBegin; 1104 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1105 stepval = (n-1)*4; 1106 for (k=0; k<m; k++) { /* loop over added rows */ 1107 row = im[k]; 1108 rp = aj + ai[row]; 1109 ap = aa + 16*ai[row]; 1110 nrow = ailen[row]; 1111 low = 0; 1112 high = nrow; 1113 for (l=0; l<n; l++) { /* loop over added columns */ 1114 col = in[l]; 1115 if (col <= lastcol) low = 0; 1116 else high = nrow; 1117 lastcol = col; 1118 value = v + k*(stepval+4 + l)*4; 1119 while (high-low > 7) { 1120 t = (low+high)/2; 1121 if (rp[t] > col) high = t; 1122 else low = t; 1123 } 1124 for (i=low; i<high; i++) { 1125 if (rp[i] > col) break; 1126 if (rp[i] == col) { 1127 bap = ap + 16*i; 1128 for (ii=0; ii<4; ii++,value+=stepval) { 1129 for (jj=ii; jj<16; jj+=4) { 1130 bap[jj] += *value++; 1131 } 1132 } 1133 goto noinsert2; 1134 } 1135 } 1136 N = nrow++ - 1; 1137 high++; /* added new column index thus must search to one higher than before */ 1138 /* shift up all the later entries in this row */ 1139 for (ii=N; ii>=i; ii--) { 1140 rp[ii+1] = rp[ii]; 1141 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1142 } 1143 if (N >= i) { 1144 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1145 } 1146 rp[i] = col; 1147 bap = ap + 16*i; 1148 for (ii=0; ii<4; ii++,value+=stepval) { 1149 for (jj=ii; jj<16; jj+=4) { 1150 bap[jj] = *value++; 1151 } 1152 } 1153 noinsert2:; 1154 low = i; 1155 } 1156 ailen[row] = nrow; 1157 } 1158 PetscFunctionReturnVoid(); 1159 } 1160 EXTERN_C_END 1161 1162 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1163 #define matsetvalues4_ MATSETVALUES4 1164 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1165 #define matsetvalues4_ matsetvalues4 1166 #endif 1167 1168 EXTERN_C_BEGIN 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatSetValues4_" 1171 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1172 { 1173 Mat A = *AA; 1174 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1175 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1176 PetscInt *ai=a->i,*ailen=a->ilen; 1177 PetscInt *aj=a->j,brow,bcol; 1178 PetscInt ridx,cidx,lastcol = -1; 1179 MatScalar *ap,value,*aa=a->a,*bap; 1180 1181 PetscFunctionBegin; 1182 for (k=0; k<m; k++) { /* loop over added rows */ 1183 row = im[k]; brow = row/4; 1184 rp = aj + ai[brow]; 1185 ap = aa + 16*ai[brow]; 1186 nrow = ailen[brow]; 1187 low = 0; 1188 high = nrow; 1189 for (l=0; l<n; l++) { /* loop over added columns */ 1190 col = in[l]; bcol = col/4; 1191 ridx = row % 4; cidx = col % 4; 1192 value = v[l + k*n]; 1193 if (col <= lastcol) low = 0; 1194 else high = nrow; 1195 lastcol = col; 1196 while (high-low > 7) { 1197 t = (low+high)/2; 1198 if (rp[t] > bcol) high = t; 1199 else low = t; 1200 } 1201 for (i=low; i<high; i++) { 1202 if (rp[i] > bcol) break; 1203 if (rp[i] == bcol) { 1204 bap = ap + 16*i + 4*cidx + ridx; 1205 *bap += value; 1206 goto noinsert1; 1207 } 1208 } 1209 N = nrow++ - 1; 1210 high++; /* added new column thus must search to one higher than before */ 1211 /* shift up all the later entries in this row */ 1212 for (ii=N; ii>=i; ii--) { 1213 rp[ii+1] = rp[ii]; 1214 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1215 } 1216 if (N>=i) { 1217 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1218 } 1219 rp[i] = bcol; 1220 ap[16*i + 4*cidx + ridx] = value; 1221 noinsert1:; 1222 low = i; 1223 } 1224 ailen[brow] = nrow; 1225 } 1226 PetscFunctionReturnVoid(); 1227 } 1228 EXTERN_C_END 1229 1230 /* 1231 Checks for missing diagonals 1232 */ 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1235 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1238 PetscErrorCode ierr; 1239 PetscInt *diag,*jj = a->j,i; 1240 1241 PetscFunctionBegin; 1242 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1243 *missing = PETSC_FALSE; 1244 if (A->rmap->n > 0 && !jj) { 1245 *missing = PETSC_TRUE; 1246 if (d) *d = 0; 1247 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1248 } else { 1249 diag = a->diag; 1250 for (i=0; i<a->mbs; i++) { 1251 if (jj[diag[i]] != i) { 1252 *missing = PETSC_TRUE; 1253 if (d) *d = i; 1254 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1255 break; 1256 } 1257 } 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1264 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 PetscInt i,j,m = a->mbs; 1269 1270 PetscFunctionBegin; 1271 if (!a->diag) { 1272 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1273 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1274 a->free_diag = PETSC_TRUE; 1275 } 1276 for (i=0; i<m; i++) { 1277 a->diag[i] = a->i[i+1]; 1278 for (j=a->i[i]; j<a->i[i+1]; j++) { 1279 if (a->j[j] == i) { 1280 a->diag[i] = j; 1281 break; 1282 } 1283 } 1284 } 1285 PetscFunctionReturn(0); 1286 } 1287 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1291 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1292 { 1293 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1296 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1297 1298 PetscFunctionBegin; 1299 *nn = n; 1300 if (!ia) PetscFunctionReturn(0); 1301 if (symmetric) { 1302 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1303 nz = tia[n]; 1304 } else { 1305 tia = a->i; tja = a->j; 1306 } 1307 1308 if (!blockcompressed && bs > 1) { 1309 (*nn) *= bs; 1310 /* malloc & create the natural set of indices */ 1311 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1312 if (n) { 1313 (*ia)[0] = 0; 1314 for (j=1; j<bs; j++) { 1315 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1316 } 1317 } 1318 1319 for (i=1; i<n; i++) { 1320 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1321 for (j=1; j<bs; j++) { 1322 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1323 } 1324 } 1325 if (n) { 1326 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1327 } 1328 1329 if (inja) { 1330 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<n; i++) { 1333 for (j=0; j<bs; j++) { 1334 for (k=tia[i]; k<tia[i+1]; k++) { 1335 for (l=0; l<bs; l++) { 1336 (*ja)[cnt++] = bs*tja[k] + l; 1337 } 1338 } 1339 } 1340 } 1341 } 1342 1343 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1344 ierr = PetscFree(tia);CHKERRQ(ierr); 1345 ierr = PetscFree(tja);CHKERRQ(ierr); 1346 } 1347 } else if (oshift == 1) { 1348 if (symmetric) { 1349 nz = tia[A->rmap->n/bs]; 1350 /* add 1 to i and j indices */ 1351 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1352 *ia = tia; 1353 if (ja) { 1354 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1355 *ja = tja; 1356 } 1357 } else { 1358 nz = a->i[A->rmap->n/bs]; 1359 /* malloc space and add 1 to i and j indices */ 1360 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1361 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1362 if (ja) { 1363 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1364 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1365 } 1366 } 1367 } else { 1368 *ia = tia; 1369 if (ja) *ja = tja; 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1376 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1377 { 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 if (!ia) PetscFunctionReturn(0); 1382 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1383 ierr = PetscFree(*ia);CHKERRQ(ierr); 1384 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1391 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1392 { 1393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 #if defined(PETSC_USE_LOG) 1398 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1399 #endif 1400 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1401 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1402 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1403 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1404 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1405 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1406 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1407 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1408 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1409 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1410 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1411 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1412 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1413 1414 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1415 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1416 ierr = PetscFree(A->data);CHKERRQ(ierr); 1417 1418 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1425 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1426 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1427 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1434 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1435 { 1436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1437 PetscErrorCode ierr; 1438 1439 PetscFunctionBegin; 1440 switch (op) { 1441 case MAT_ROW_ORIENTED: 1442 a->roworiented = flg; 1443 break; 1444 case MAT_KEEP_NONZERO_PATTERN: 1445 a->keepnonzeropattern = flg; 1446 break; 1447 case MAT_NEW_NONZERO_LOCATIONS: 1448 a->nonew = (flg ? 0 : 1); 1449 break; 1450 case MAT_NEW_NONZERO_LOCATION_ERR: 1451 a->nonew = (flg ? -1 : 0); 1452 break; 1453 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1454 a->nonew = (flg ? -2 : 0); 1455 break; 1456 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1457 a->nounused = (flg ? -1 : 0); 1458 break; 1459 case MAT_CHECK_COMPRESSED_ROW: 1460 a->compressedrow.check = flg; 1461 break; 1462 case MAT_NEW_DIAGONALS: 1463 case MAT_IGNORE_OFF_PROC_ENTRIES: 1464 case MAT_USE_HASH_TABLE: 1465 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1466 break; 1467 case MAT_SPD: 1468 case MAT_SYMMETRIC: 1469 case MAT_STRUCTURALLY_SYMMETRIC: 1470 case MAT_HERMITIAN: 1471 case MAT_SYMMETRY_ETERNAL: 1472 /* These options are handled directly by MatSetOption() */ 1473 break; 1474 default: 1475 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1482 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1483 { 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1487 MatScalar *aa,*aa_i; 1488 PetscScalar *v_i; 1489 1490 PetscFunctionBegin; 1491 bs = A->rmap->bs; 1492 ai = a->i; 1493 aj = a->j; 1494 aa = a->a; 1495 bs2 = a->bs2; 1496 1497 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1498 1499 bn = row/bs; /* Block number */ 1500 bp = row % bs; /* Block Position */ 1501 M = ai[bn+1] - ai[bn]; 1502 *nz = bs*M; 1503 1504 if (v) { 1505 *v = 0; 1506 if (*nz) { 1507 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1508 for (i=0; i<M; i++) { /* for each block in the block row */ 1509 v_i = *v + i*bs; 1510 aa_i = aa + bs2*(ai[bn] + i); 1511 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1512 } 1513 } 1514 } 1515 1516 if (idx) { 1517 *idx = 0; 1518 if (*nz) { 1519 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1520 for (i=0; i<M; i++) { /* for each block in the block row */ 1521 idx_i = *idx + i*bs; 1522 itmp = bs*aj[ai[bn] + i]; 1523 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1524 } 1525 } 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1532 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1538 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1539 PetscFunctionReturn(0); 1540 } 1541 1542 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1546 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1547 { 1548 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1549 Mat C; 1550 PetscErrorCode ierr; 1551 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1552 PetscInt *rows,*cols,bs2=a->bs2; 1553 MatScalar *array; 1554 1555 PetscFunctionBegin; 1556 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1557 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1558 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1559 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1560 1561 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1562 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1563 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1564 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1565 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1566 ierr = PetscFree(col);CHKERRQ(ierr); 1567 } else { 1568 C = *B; 1569 } 1570 1571 array = a->a; 1572 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1573 for (i=0; i<mbs; i++) { 1574 cols[0] = i*bs; 1575 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1576 len = ai[i+1] - ai[i]; 1577 for (j=0; j<len; j++) { 1578 rows[0] = (*aj++)*bs; 1579 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1580 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1581 array += bs2; 1582 } 1583 } 1584 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1585 1586 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1588 1589 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1590 *B = C; 1591 } else { 1592 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1600 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1601 { 1602 PetscErrorCode ierr; 1603 Mat Btrans; 1604 1605 PetscFunctionBegin; 1606 *f = PETSC_FALSE; 1607 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1608 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1609 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 EXTERN_C_END 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1616 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1617 { 1618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1619 PetscErrorCode ierr; 1620 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1621 int fd; 1622 PetscScalar *aa; 1623 FILE *file; 1624 1625 PetscFunctionBegin; 1626 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1627 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1628 col_lens[0] = MAT_FILE_CLASSID; 1629 1630 col_lens[1] = A->rmap->N; 1631 col_lens[2] = A->cmap->n; 1632 col_lens[3] = a->nz*bs2; 1633 1634 /* store lengths of each row and write (including header) to file */ 1635 count = 0; 1636 for (i=0; i<a->mbs; i++) { 1637 for (j=0; j<bs; j++) { 1638 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1639 } 1640 } 1641 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1642 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1643 1644 /* store column indices (zero start index) */ 1645 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1646 count = 0; 1647 for (i=0; i<a->mbs; i++) { 1648 for (j=0; j<bs; j++) { 1649 for (k=a->i[i]; k<a->i[i+1]; k++) { 1650 for (l=0; l<bs; l++) { 1651 jj[count++] = bs*a->j[k] + l; 1652 } 1653 } 1654 } 1655 } 1656 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1657 ierr = PetscFree(jj);CHKERRQ(ierr); 1658 1659 /* store nonzero values */ 1660 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1661 count = 0; 1662 for (i=0; i<a->mbs; i++) { 1663 for (j=0; j<bs; j++) { 1664 for (k=a->i[i]; k<a->i[i+1]; k++) { 1665 for (l=0; l<bs; l++) { 1666 aa[count++] = a->a[bs2*k + l*bs + j]; 1667 } 1668 } 1669 } 1670 } 1671 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1672 ierr = PetscFree(aa);CHKERRQ(ierr); 1673 1674 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1675 if (file) { 1676 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1683 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1684 { 1685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1686 PetscErrorCode ierr; 1687 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1688 PetscViewerFormat format; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1692 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1693 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1694 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1695 Mat aij; 1696 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1697 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1698 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1699 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1700 PetscFunctionReturn(0); 1701 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1702 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1703 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1704 for (i=0; i<a->mbs; i++) { 1705 for (j=0; j<bs; j++) { 1706 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1707 for (k=a->i[i]; k<a->i[i+1]; k++) { 1708 for (l=0; l<bs; l++) { 1709 #if defined(PETSC_USE_COMPLEX) 1710 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1711 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1712 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1713 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1714 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1715 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1716 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1717 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1718 } 1719 #else 1720 if (a->a[bs2*k + l*bs + j] != 0.0) { 1721 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1722 } 1723 #endif 1724 } 1725 } 1726 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1727 } 1728 } 1729 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1730 } else { 1731 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1732 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1733 for (i=0; i<a->mbs; i++) { 1734 for (j=0; j<bs; j++) { 1735 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1736 for (k=a->i[i]; k<a->i[i+1]; k++) { 1737 for (l=0; l<bs; l++) { 1738 #if defined(PETSC_USE_COMPLEX) 1739 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1741 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1742 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1743 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1744 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1745 } else { 1746 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1747 } 1748 #else 1749 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1750 #endif 1751 } 1752 } 1753 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1757 } 1758 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1764 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1765 { 1766 Mat A = (Mat) Aa; 1767 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1768 PetscErrorCode ierr; 1769 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1770 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1771 MatScalar *aa; 1772 PetscViewer viewer; 1773 PetscViewerFormat format; 1774 1775 PetscFunctionBegin; 1776 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1777 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1778 1779 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1780 1781 /* loop over matrix elements drawing boxes */ 1782 1783 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1784 color = PETSC_DRAW_BLUE; 1785 for (i=0,row=0; i<mbs; i++,row+=bs) { 1786 for (j=a->i[i]; j<a->i[i+1]; j++) { 1787 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1788 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1789 aa = a->a + j*bs2; 1790 for (k=0; k<bs; k++) { 1791 for (l=0; l<bs; l++) { 1792 if (PetscRealPart(*aa++) >= 0.) continue; 1793 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1794 } 1795 } 1796 } 1797 } 1798 color = PETSC_DRAW_CYAN; 1799 for (i=0,row=0; i<mbs; i++,row+=bs) { 1800 for (j=a->i[i]; j<a->i[i+1]; j++) { 1801 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1802 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1803 aa = a->a + j*bs2; 1804 for (k=0; k<bs; k++) { 1805 for (l=0; l<bs; l++) { 1806 if (PetscRealPart(*aa++) != 0.) continue; 1807 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1808 } 1809 } 1810 } 1811 } 1812 color = PETSC_DRAW_RED; 1813 for (i=0,row=0; i<mbs; i++,row+=bs) { 1814 for (j=a->i[i]; j<a->i[i+1]; j++) { 1815 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1816 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1817 aa = a->a + j*bs2; 1818 for (k=0; k<bs; k++) { 1819 for (l=0; l<bs; l++) { 1820 if (PetscRealPart(*aa++) <= 0.) continue; 1821 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1822 } 1823 } 1824 } 1825 } 1826 } else { 1827 /* use contour shading to indicate magnitude of values */ 1828 /* first determine max of all nonzero values */ 1829 PetscDraw popup; 1830 PetscReal scale,maxv = 0.0; 1831 1832 for (i=0; i<a->nz*a->bs2; i++) { 1833 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1834 } 1835 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1836 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1837 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1838 for (i=0,row=0; i<mbs; i++,row+=bs) { 1839 for (j=a->i[i]; j<a->i[i+1]; j++) { 1840 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1841 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1842 aa = a->a + j*bs2; 1843 for (k=0; k<bs; k++) { 1844 for (l=0; l<bs; l++) { 1845 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1846 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1847 } 1848 } 1849 } 1850 } 1851 } 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1857 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1858 { 1859 PetscErrorCode ierr; 1860 PetscReal xl,yl,xr,yr,w,h; 1861 PetscDraw draw; 1862 PetscBool isnull; 1863 1864 PetscFunctionBegin; 1865 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1866 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1867 1868 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1869 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1870 xr += w; yr += h; xl = -w; yl = -h; 1871 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1872 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1873 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatView_SeqBAIJ" 1879 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1880 { 1881 PetscErrorCode ierr; 1882 PetscBool iascii,isbinary,isdraw; 1883 1884 PetscFunctionBegin; 1885 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1886 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1887 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1888 if (iascii) { 1889 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1890 } else if (isbinary) { 1891 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1892 } else if (isdraw) { 1893 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1894 } else { 1895 Mat B; 1896 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1897 ierr = MatView(B,viewer);CHKERRQ(ierr); 1898 ierr = MatDestroy(&B);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1906 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1907 { 1908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1909 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1910 PetscInt *ai = a->i,*ailen = a->ilen; 1911 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1912 MatScalar *ap,*aa = a->a; 1913 1914 PetscFunctionBegin; 1915 for (k=0; k<m; k++) { /* loop over rows */ 1916 row = im[k]; brow = row/bs; 1917 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1918 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1919 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1920 nrow = ailen[brow]; 1921 for (l=0; l<n; l++) { /* loop over columns */ 1922 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1923 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1924 col = in[l] ; 1925 bcol = col/bs; 1926 cidx = col%bs; 1927 ridx = row%bs; 1928 high = nrow; 1929 low = 0; /* assume unsorted */ 1930 while (high-low > 5) { 1931 t = (low+high)/2; 1932 if (rp[t] > bcol) high = t; 1933 else low = t; 1934 } 1935 for (i=low; i<high; i++) { 1936 if (rp[i] > bcol) break; 1937 if (rp[i] == bcol) { 1938 *v++ = ap[bs2*i+bs*cidx+ridx]; 1939 goto finished; 1940 } 1941 } 1942 *v++ = 0.0; 1943 finished:; 1944 } 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1951 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1952 { 1953 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1954 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1955 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1956 PetscErrorCode ierr; 1957 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1958 PetscBool roworiented=a->roworiented; 1959 const PetscScalar *value = v; 1960 MatScalar *ap,*aa = a->a,*bap; 1961 1962 PetscFunctionBegin; 1963 if (roworiented) { 1964 stepval = (n-1)*bs; 1965 } else { 1966 stepval = (m-1)*bs; 1967 } 1968 for (k=0; k<m; k++) { /* loop over added rows */ 1969 row = im[k]; 1970 if (row < 0) continue; 1971 #if defined(PETSC_USE_DEBUG) 1972 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1973 #endif 1974 rp = aj + ai[row]; 1975 ap = aa + bs2*ai[row]; 1976 rmax = imax[row]; 1977 nrow = ailen[row]; 1978 low = 0; 1979 high = nrow; 1980 for (l=0; l<n; l++) { /* loop over added columns */ 1981 if (in[l] < 0) continue; 1982 #if defined(PETSC_USE_DEBUG) 1983 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1984 #endif 1985 col = in[l]; 1986 if (roworiented) { 1987 value = v + (k*(stepval+bs) + l)*bs; 1988 } else { 1989 value = v + (l*(stepval+bs) + k)*bs; 1990 } 1991 if (col <= lastcol) low = 0; else high = nrow; 1992 lastcol = col; 1993 while (high-low > 7) { 1994 t = (low+high)/2; 1995 if (rp[t] > col) high = t; 1996 else low = t; 1997 } 1998 for (i=low; i<high; i++) { 1999 if (rp[i] > col) break; 2000 if (rp[i] == col) { 2001 bap = ap + bs2*i; 2002 if (roworiented) { 2003 if (is == ADD_VALUES) { 2004 for (ii=0; ii<bs; ii++,value+=stepval) { 2005 for (jj=ii; jj<bs2; jj+=bs) { 2006 bap[jj] += *value++; 2007 } 2008 } 2009 } else { 2010 for (ii=0; ii<bs; ii++,value+=stepval) { 2011 for (jj=ii; jj<bs2; jj+=bs) { 2012 bap[jj] = *value++; 2013 } 2014 } 2015 } 2016 } else { 2017 if (is == ADD_VALUES) { 2018 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2019 for (jj=0; jj<bs; jj++) { 2020 bap[jj] += value[jj]; 2021 } 2022 bap += bs; 2023 } 2024 } else { 2025 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2026 for (jj=0; jj<bs; jj++) { 2027 bap[jj] = value[jj]; 2028 } 2029 bap += bs; 2030 } 2031 } 2032 } 2033 goto noinsert2; 2034 } 2035 } 2036 if (nonew == 1) goto noinsert2; 2037 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2038 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2039 N = nrow++ - 1; high++; 2040 /* shift up all the later entries in this row */ 2041 for (ii=N; ii>=i; ii--) { 2042 rp[ii+1] = rp[ii]; 2043 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2044 } 2045 if (N >= i) { 2046 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 rp[i] = col; 2049 bap = ap + bs2*i; 2050 if (roworiented) { 2051 for (ii=0; ii<bs; ii++,value+=stepval) { 2052 for (jj=ii; jj<bs2; jj+=bs) { 2053 bap[jj] = *value++; 2054 } 2055 } 2056 } else { 2057 for (ii=0; ii<bs; ii++,value+=stepval) { 2058 for (jj=0; jj<bs; jj++) { 2059 *bap++ = *value++; 2060 } 2061 } 2062 } 2063 noinsert2:; 2064 low = i; 2065 } 2066 ailen[row] = nrow; 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2073 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2074 { 2075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2076 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2077 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2078 PetscErrorCode ierr; 2079 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2080 MatScalar *aa = a->a,*ap; 2081 PetscReal ratio=0.6; 2082 2083 PetscFunctionBegin; 2084 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2085 2086 if (m) rmax = ailen[0]; 2087 for (i=1; i<mbs; i++) { 2088 /* move each row back by the amount of empty slots (fshift) before it*/ 2089 fshift += imax[i-1] - ailen[i-1]; 2090 rmax = PetscMax(rmax,ailen[i]); 2091 if (fshift) { 2092 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2093 N = ailen[i]; 2094 for (j=0; j<N; j++) { 2095 ip[j-fshift] = ip[j]; 2096 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2097 } 2098 } 2099 ai[i] = ai[i-1] + ailen[i-1]; 2100 } 2101 if (mbs) { 2102 fshift += imax[mbs-1] - ailen[mbs-1]; 2103 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2104 } 2105 /* reset ilen and imax for each row */ 2106 for (i=0; i<mbs; i++) { 2107 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2108 } 2109 a->nz = ai[mbs]; 2110 2111 /* diagonals may have moved, so kill the diagonal pointers */ 2112 a->idiagvalid = PETSC_FALSE; 2113 if (fshift && a->diag) { 2114 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2116 a->diag = 0; 2117 } 2118 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2119 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2120 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2121 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2122 A->info.mallocs += a->reallocs; 2123 a->reallocs = 0; 2124 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2125 2126 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2127 A->same_nonzero = PETSC_TRUE; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* 2132 This function returns an array of flags which indicate the locations of contiguous 2133 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2134 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2135 Assume: sizes should be long enough to hold all the values. 2136 */ 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2139 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2140 { 2141 PetscInt i,j,k,row; 2142 PetscBool flg; 2143 2144 PetscFunctionBegin; 2145 for (i=0,j=0; i<n; j++) { 2146 row = idx[i]; 2147 if (row%bs!=0) { /* Not the begining of a block */ 2148 sizes[j] = 1; 2149 i++; 2150 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2151 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2152 i++; 2153 } else { /* Begining of the block, so check if the complete block exists */ 2154 flg = PETSC_TRUE; 2155 for (k=1; k<bs; k++) { 2156 if (row+k != idx[i+k]) { /* break in the block */ 2157 flg = PETSC_FALSE; 2158 break; 2159 } 2160 } 2161 if (flg) { /* No break in the bs */ 2162 sizes[j] = bs; 2163 i+= bs; 2164 } else { 2165 sizes[j] = 1; 2166 i++; 2167 } 2168 } 2169 } 2170 *bs_max = j; 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2176 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2177 { 2178 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2179 PetscErrorCode ierr; 2180 PetscInt i,j,k,count,*rows; 2181 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2182 PetscScalar zero = 0.0; 2183 MatScalar *aa; 2184 const PetscScalar *xx; 2185 PetscScalar *bb; 2186 2187 PetscFunctionBegin; 2188 /* fix right hand side if needed */ 2189 if (x && b) { 2190 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2191 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2192 for (i=0; i<is_n; i++) { 2193 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2194 } 2195 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2196 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2197 } 2198 2199 /* Make a copy of the IS and sort it */ 2200 /* allocate memory for rows,sizes */ 2201 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2202 2203 /* copy IS values to rows, and sort them */ 2204 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2205 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2206 2207 if (baij->keepnonzeropattern) { 2208 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2209 bs_max = is_n; 2210 A->same_nonzero = PETSC_TRUE; 2211 } else { 2212 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2213 A->same_nonzero = PETSC_FALSE; 2214 } 2215 2216 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2217 row = rows[j]; 2218 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2219 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2220 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2221 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2222 if (diag != (PetscScalar)0.0) { 2223 if (baij->ilen[row/bs] > 0) { 2224 baij->ilen[row/bs] = 1; 2225 baij->j[baij->i[row/bs]] = row/bs; 2226 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2227 } 2228 /* Now insert all the diagonal values for this bs */ 2229 for (k=0; k<bs; k++) { 2230 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 } else { /* (diag == 0.0) */ 2233 baij->ilen[row/bs] = 0; 2234 } /* end (diag == 0.0) */ 2235 } else { /* (sizes[i] != bs) */ 2236 #if defined (PETSC_USE_DEBUG) 2237 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2238 #endif 2239 for (k=0; k<count; k++) { 2240 aa[0] = zero; 2241 aa += bs; 2242 } 2243 if (diag != (PetscScalar)0.0) { 2244 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2245 } 2246 } 2247 } 2248 2249 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2250 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2256 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2257 { 2258 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2259 PetscErrorCode ierr; 2260 PetscInt i,j,k,count; 2261 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2262 PetscScalar zero = 0.0; 2263 MatScalar *aa; 2264 const PetscScalar *xx; 2265 PetscScalar *bb; 2266 PetscBool *zeroed,vecs = PETSC_FALSE; 2267 2268 PetscFunctionBegin; 2269 /* fix right hand side if needed */ 2270 if (x && b) { 2271 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2272 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2273 vecs = PETSC_TRUE; 2274 } 2275 A->same_nonzero = PETSC_TRUE; 2276 2277 /* zero the columns */ 2278 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2279 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2280 for (i=0; i<is_n; i++) { 2281 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2282 zeroed[is_idx[i]] = PETSC_TRUE; 2283 } 2284 for (i=0; i<A->rmap->N; i++) { 2285 if (!zeroed[i]) { 2286 row = i/bs; 2287 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2288 for (k=0; k<bs; k++) { 2289 col = bs*baij->j[j] + k; 2290 if (zeroed[col]) { 2291 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2292 if (vecs) bb[i] -= aa[0]*xx[col]; 2293 aa[0] = 0.0; 2294 } 2295 } 2296 } 2297 } else if (vecs) bb[i] = diag*xx[i]; 2298 } 2299 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2300 if (vecs) { 2301 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2302 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2303 } 2304 2305 /* zero the rows */ 2306 for (i=0; i<is_n; i++) { 2307 row = is_idx[i]; 2308 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2309 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2310 for (k=0; k<count; k++) { 2311 aa[0] = zero; 2312 aa += bs; 2313 } 2314 if (diag != (PetscScalar)0.0) { 2315 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2316 } 2317 } 2318 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2324 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2325 { 2326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2327 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2328 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2329 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2330 PetscErrorCode ierr; 2331 PetscInt ridx,cidx,bs2=a->bs2; 2332 PetscBool roworiented=a->roworiented; 2333 MatScalar *ap,value,*aa=a->a,*bap; 2334 2335 PetscFunctionBegin; 2336 if (v) PetscValidScalarPointer(v,6); 2337 for (k=0; k<m; k++) { /* loop over added rows */ 2338 row = im[k]; 2339 brow = row/bs; 2340 if (row < 0) continue; 2341 #if defined(PETSC_USE_DEBUG) 2342 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2343 #endif 2344 rp = aj + ai[brow]; 2345 ap = aa + bs2*ai[brow]; 2346 rmax = imax[brow]; 2347 nrow = ailen[brow]; 2348 low = 0; 2349 high = nrow; 2350 for (l=0; l<n; l++) { /* loop over added columns */ 2351 if (in[l] < 0) continue; 2352 #if defined(PETSC_USE_DEBUG) 2353 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2354 #endif 2355 col = in[l]; bcol = col/bs; 2356 ridx = row % bs; cidx = col % bs; 2357 if (roworiented) { 2358 value = v[l + k*n]; 2359 } else { 2360 value = v[k + l*m]; 2361 } 2362 if (col <= lastcol) low = 0; else high = nrow; 2363 lastcol = col; 2364 while (high-low > 7) { 2365 t = (low+high)/2; 2366 if (rp[t] > bcol) high = t; 2367 else low = t; 2368 } 2369 for (i=low; i<high; i++) { 2370 if (rp[i] > bcol) break; 2371 if (rp[i] == bcol) { 2372 bap = ap + bs2*i + bs*cidx + ridx; 2373 if (is == ADD_VALUES) *bap += value; 2374 else *bap = value; 2375 goto noinsert1; 2376 } 2377 } 2378 if (nonew == 1) goto noinsert1; 2379 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2380 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2381 N = nrow++ - 1; high++; 2382 /* shift up all the later entries in this row */ 2383 for (ii=N; ii>=i; ii--) { 2384 rp[ii+1] = rp[ii]; 2385 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2386 } 2387 if (N>=i) { 2388 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2389 } 2390 rp[i] = bcol; 2391 ap[bs2*i + bs*cidx + ridx] = value; 2392 a->nz++; 2393 noinsert1:; 2394 low = i; 2395 } 2396 ailen[brow] = nrow; 2397 } 2398 A->same_nonzero = PETSC_FALSE; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2404 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2405 { 2406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2407 Mat outA; 2408 PetscErrorCode ierr; 2409 PetscBool row_identity,col_identity; 2410 2411 PetscFunctionBegin; 2412 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2413 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2414 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2415 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2416 2417 outA = inA; 2418 inA->factortype = MAT_FACTOR_LU; 2419 2420 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2421 2422 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2423 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2424 a->row = row; 2425 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2426 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2427 a->col = col; 2428 2429 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2430 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2431 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2432 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2433 2434 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2435 if (!a->solve_work) { 2436 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2437 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2438 } 2439 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2440 2441 PetscFunctionReturn(0); 2442 } 2443 2444 EXTERN_C_BEGIN 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2447 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2448 { 2449 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2450 PetscInt i,nz,mbs; 2451 2452 PetscFunctionBegin; 2453 nz = baij->maxnz; 2454 mbs = baij->mbs; 2455 for (i=0; i<nz; i++) { 2456 baij->j[i] = indices[i]; 2457 } 2458 baij->nz = nz; 2459 for (i=0; i<mbs; i++) { 2460 baij->ilen[i] = baij->imax[i]; 2461 } 2462 PetscFunctionReturn(0); 2463 } 2464 EXTERN_C_END 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2468 /*@ 2469 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2470 in the matrix. 2471 2472 Input Parameters: 2473 + mat - the SeqBAIJ matrix 2474 - indices - the column indices 2475 2476 Level: advanced 2477 2478 Notes: 2479 This can be called if you have precomputed the nonzero structure of the 2480 matrix and want to provide it to the matrix object to improve the performance 2481 of the MatSetValues() operation. 2482 2483 You MUST have set the correct numbers of nonzeros per row in the call to 2484 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2485 2486 MUST be called before any calls to MatSetValues(); 2487 2488 @*/ 2489 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2490 { 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2495 PetscValidPointer(indices,2); 2496 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2502 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2503 { 2504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2505 PetscErrorCode ierr; 2506 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2507 PetscReal atmp; 2508 PetscScalar *x,zero = 0.0; 2509 MatScalar *aa; 2510 PetscInt ncols,brow,krow,kcol; 2511 2512 PetscFunctionBegin; 2513 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2514 bs = A->rmap->bs; 2515 aa = a->a; 2516 ai = a->i; 2517 aj = a->j; 2518 mbs = a->mbs; 2519 2520 ierr = VecSet(v,zero);CHKERRQ(ierr); 2521 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2522 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2523 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2524 for (i=0; i<mbs; i++) { 2525 ncols = ai[1] - ai[0]; ai++; 2526 brow = bs*i; 2527 for (j=0; j<ncols; j++) { 2528 for (kcol=0; kcol<bs; kcol++) { 2529 for (krow=0; krow<bs; krow++) { 2530 atmp = PetscAbsScalar(*aa);aa++; 2531 row = brow + krow; /* row index */ 2532 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2533 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2534 } 2535 } 2536 aj++; 2537 } 2538 } 2539 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatCopy_SeqBAIJ" 2545 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2546 { 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 /* If the two matrices have the same copy implementation, use fast copy. */ 2551 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2553 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2554 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2555 2556 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2557 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2558 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2559 } else { 2560 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2567 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2568 { 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2578 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2579 { 2580 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2581 2582 PetscFunctionBegin; 2583 *array = a->a; 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2589 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2590 { 2591 PetscFunctionBegin; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2597 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2598 { 2599 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2600 PetscErrorCode ierr; 2601 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2602 PetscBLASInt one=1; 2603 2604 PetscFunctionBegin; 2605 if (str == SAME_NONZERO_PATTERN) { 2606 PetscScalar alpha = a; 2607 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2608 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2609 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2610 if (y->xtoy && y->XtoY != X) { 2611 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2612 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2613 } 2614 if (!y->xtoy) { /* get xtoy */ 2615 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2616 y->XtoY = X; 2617 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2618 } 2619 for (i=0; i<x->nz; i++) { 2620 j = 0; 2621 while (j < bs2) { 2622 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2623 j++; 2624 } 2625 } 2626 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2627 } else { 2628 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2629 } 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2635 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2636 { 2637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2638 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2639 MatScalar *aa = a->a; 2640 2641 PetscFunctionBegin; 2642 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2648 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2649 { 2650 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2651 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2652 MatScalar *aa = a->a; 2653 2654 PetscFunctionBegin; 2655 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2656 PetscFunctionReturn(0); 2657 } 2658 2659 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2663 /* 2664 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2665 */ 2666 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2667 { 2668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2669 PetscErrorCode ierr; 2670 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2671 PetscInt nz = a->i[m],row,*jj,mr,col; 2672 2673 PetscFunctionBegin; 2674 *nn = n; 2675 if (!ia) PetscFunctionReturn(0); 2676 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2677 else { 2678 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2679 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2680 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2681 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2682 jj = a->j; 2683 for (i=0; i<nz; i++) { 2684 collengths[jj[i]]++; 2685 } 2686 cia[0] = oshift; 2687 for (i=0; i<n; i++) { 2688 cia[i+1] = cia[i] + collengths[i]; 2689 } 2690 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2691 jj = a->j; 2692 for (row=0; row<m; row++) { 2693 mr = a->i[row+1] - a->i[row]; 2694 for (i=0; i<mr; i++) { 2695 col = *jj++; 2696 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2697 } 2698 } 2699 ierr = PetscFree(collengths);CHKERRQ(ierr); 2700 *ia = cia; *ja = cja; 2701 } 2702 PetscFunctionReturn(0); 2703 } 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2707 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2708 { 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 if (!ia) PetscFunctionReturn(0); 2713 ierr = PetscFree(*ia);CHKERRQ(ierr); 2714 ierr = PetscFree(*ja);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2720 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2721 { 2722 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2723 PetscErrorCode ierr; 2724 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2725 PetscScalar dx,*y,*xx,*w3_array; 2726 PetscScalar *vscale_array; 2727 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2728 Vec w1=coloring->w1,w2=coloring->w2,w3; 2729 void *fctx = coloring->fctx; 2730 PetscBool flg = PETSC_FALSE; 2731 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2732 Vec x1_tmp; 2733 2734 PetscFunctionBegin; 2735 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2736 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2737 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2738 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2739 2740 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2741 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2742 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2743 if (flg) { 2744 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2745 } else { 2746 PetscBool assembled; 2747 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2748 if (assembled) { 2749 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2750 } 2751 } 2752 2753 x1_tmp = x1; 2754 if (!coloring->vscale) { 2755 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2756 } 2757 2758 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2759 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2760 } 2761 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2762 2763 /* Set w1 = F(x1) */ 2764 if (!coloring->fset) { 2765 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2766 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2767 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2768 } else { 2769 coloring->fset = PETSC_FALSE; 2770 } 2771 2772 if (!coloring->w3) { 2773 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2774 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2775 } 2776 w3 = coloring->w3; 2777 2778 CHKMEMQ; 2779 /* Compute all the local scale factors, including ghost points */ 2780 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2781 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2782 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2783 if (ctype == IS_COLORING_GHOSTED) { 2784 col_start = 0; col_end = N; 2785 } else if (ctype == IS_COLORING_GLOBAL) { 2786 xx = xx - start; 2787 vscale_array = vscale_array - start; 2788 col_start = start; col_end = N + start; 2789 } CHKMEMQ; 2790 for (col=col_start; col<col_end; col++) { 2791 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2792 if (coloring->htype[0] == 'w') { 2793 dx = 1.0 + unorm; 2794 } else { 2795 dx = xx[col]; 2796 } 2797 if (dx == (PetscScalar)0.0) dx = 1.0; 2798 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2799 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2800 dx *= epsilon; 2801 vscale_array[col] = (PetscScalar)1.0/dx; 2802 } CHKMEMQ; 2803 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2804 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2805 if (ctype == IS_COLORING_GLOBAL) { 2806 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2807 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2808 } 2809 CHKMEMQ; 2810 if (coloring->vscaleforrow) { 2811 vscaleforrow = coloring->vscaleforrow; 2812 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2813 2814 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2815 /* 2816 Loop over each color 2817 */ 2818 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2819 for (k=0; k<coloring->ncolors; k++) { 2820 coloring->currentcolor = k; 2821 for (i=0; i<bs; i++) { 2822 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2823 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2824 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2825 /* 2826 Loop over each column associated with color 2827 adding the perturbation to the vector w3. 2828 */ 2829 for (l=0; l<coloring->ncolumns[k]; l++) { 2830 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2831 if (coloring->htype[0] == 'w') { 2832 dx = 1.0 + unorm; 2833 } else { 2834 dx = xx[col]; 2835 } 2836 if (dx == (PetscScalar)0.0) dx = 1.0; 2837 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2838 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2839 dx *= epsilon; 2840 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2841 w3_array[col] += dx; 2842 } 2843 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2844 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2845 2846 /* 2847 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2848 w2 = F(x1 + dx) - F(x1) 2849 */ 2850 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2851 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2852 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2853 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2854 2855 /* 2856 Loop over rows of vector, putting results into Jacobian matrix 2857 */ 2858 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2859 for (l=0; l<coloring->nrows[k]; l++) { 2860 row = bs*coloring->rows[k][l]; /* local row index */ 2861 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2862 for (j=0; j<bs; j++) { 2863 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2864 srows[j] = row + start + j; 2865 } 2866 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2867 } 2868 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2869 } 2870 } /* endof for each color */ 2871 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2872 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2873 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2874 ierr = PetscFree(srows);CHKERRQ(ierr); 2875 2876 coloring->currentcolor = -1; 2877 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2878 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2880 PetscFunctionReturn(0); 2881 } 2882 2883 /* -------------------------------------------------------------------*/ 2884 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2885 MatGetRow_SeqBAIJ, 2886 MatRestoreRow_SeqBAIJ, 2887 MatMult_SeqBAIJ_N, 2888 /* 4*/ MatMultAdd_SeqBAIJ_N, 2889 MatMultTranspose_SeqBAIJ, 2890 MatMultTransposeAdd_SeqBAIJ, 2891 0, 2892 0, 2893 0, 2894 /*10*/ 0, 2895 MatLUFactor_SeqBAIJ, 2896 0, 2897 0, 2898 MatTranspose_SeqBAIJ, 2899 /*15*/ MatGetInfo_SeqBAIJ, 2900 MatEqual_SeqBAIJ, 2901 MatGetDiagonal_SeqBAIJ, 2902 MatDiagonalScale_SeqBAIJ, 2903 MatNorm_SeqBAIJ, 2904 /*20*/ 0, 2905 MatAssemblyEnd_SeqBAIJ, 2906 MatSetOption_SeqBAIJ, 2907 MatZeroEntries_SeqBAIJ, 2908 /*24*/ MatZeroRows_SeqBAIJ, 2909 0, 2910 0, 2911 0, 2912 0, 2913 /*29*/ MatSetUp_SeqBAIJ, 2914 0, 2915 0, 2916 0, 2917 0, 2918 /*34*/ MatDuplicate_SeqBAIJ, 2919 0, 2920 0, 2921 MatILUFactor_SeqBAIJ, 2922 0, 2923 /*39*/ MatAXPY_SeqBAIJ, 2924 MatGetSubMatrices_SeqBAIJ, 2925 MatIncreaseOverlap_SeqBAIJ, 2926 MatGetValues_SeqBAIJ, 2927 MatCopy_SeqBAIJ, 2928 /*44*/ 0, 2929 MatScale_SeqBAIJ, 2930 0, 2931 0, 2932 MatZeroRowsColumns_SeqBAIJ, 2933 /*49*/ 0, 2934 MatGetRowIJ_SeqBAIJ, 2935 MatRestoreRowIJ_SeqBAIJ, 2936 MatGetColumnIJ_SeqBAIJ, 2937 MatRestoreColumnIJ_SeqBAIJ, 2938 /*54*/ MatFDColoringCreate_SeqAIJ, 2939 0, 2940 0, 2941 0, 2942 MatSetValuesBlocked_SeqBAIJ, 2943 /*59*/ MatGetSubMatrix_SeqBAIJ, 2944 MatDestroy_SeqBAIJ, 2945 MatView_SeqBAIJ, 2946 0, 2947 0, 2948 /*64*/ 0, 2949 0, 2950 0, 2951 0, 2952 0, 2953 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2954 0, 2955 MatConvert_Basic, 2956 0, 2957 0, 2958 /*74*/ 0, 2959 MatFDColoringApply_BAIJ, 2960 0, 2961 0, 2962 0, 2963 /*79*/ 0, 2964 0, 2965 0, 2966 0, 2967 MatLoad_SeqBAIJ, 2968 /*84*/ 0, 2969 0, 2970 0, 2971 0, 2972 0, 2973 /*89*/ 0, 2974 0, 2975 0, 2976 0, 2977 0, 2978 /*94*/ 0, 2979 0, 2980 0, 2981 0, 2982 0, 2983 /*99*/0, 2984 0, 2985 0, 2986 0, 2987 0, 2988 /*104*/0, 2989 MatRealPart_SeqBAIJ, 2990 MatImaginaryPart_SeqBAIJ, 2991 0, 2992 0, 2993 /*109*/0, 2994 0, 2995 0, 2996 0, 2997 MatMissingDiagonal_SeqBAIJ, 2998 /*114*/0, 2999 0, 3000 0, 3001 0, 3002 0, 3003 /*119*/0, 3004 0, 3005 MatMultHermitianTranspose_SeqBAIJ, 3006 MatMultHermitianTransposeAdd_SeqBAIJ, 3007 0, 3008 /*124*/0, 3009 0, 3010 MatInvertBlockDiagonal_SeqBAIJ 3011 }; 3012 3013 EXTERN_C_BEGIN 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3016 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3017 { 3018 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3019 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3020 PetscErrorCode ierr; 3021 3022 PetscFunctionBegin; 3023 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3024 3025 /* allocate space for values if not already there */ 3026 if (!aij->saved_values) { 3027 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3028 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3029 } 3030 3031 /* copy values over */ 3032 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 EXTERN_C_END 3036 3037 EXTERN_C_BEGIN 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3040 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3041 { 3042 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3043 PetscErrorCode ierr; 3044 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3045 3046 PetscFunctionBegin; 3047 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3048 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3049 3050 /* copy values over */ 3051 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 EXTERN_C_END 3055 3056 EXTERN_C_BEGIN 3057 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3058 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3059 EXTERN_C_END 3060 3061 EXTERN_C_BEGIN 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3064 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3065 { 3066 Mat_SeqBAIJ *b; 3067 PetscErrorCode ierr; 3068 PetscInt i,mbs,nbs,bs2; 3069 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3070 3071 PetscFunctionBegin; 3072 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3073 if (nz == MAT_SKIP_ALLOCATION) { 3074 skipallocation = PETSC_TRUE; 3075 nz = 0; 3076 } 3077 3078 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3079 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3080 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3081 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3082 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3083 3084 B->preallocated = PETSC_TRUE; 3085 3086 mbs = B->rmap->n/bs; 3087 nbs = B->cmap->n/bs; 3088 bs2 = bs*bs; 3089 3090 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3091 3092 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3093 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3094 if (nnz) { 3095 for (i=0; i<mbs; i++) { 3096 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3097 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3098 } 3099 } 3100 3101 b = (Mat_SeqBAIJ*)B->data; 3102 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3103 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3104 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3105 3106 if (!flg) { 3107 switch (bs) { 3108 case 1: 3109 B->ops->mult = MatMult_SeqBAIJ_1; 3110 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3111 B->ops->sor = MatSOR_SeqBAIJ_1; 3112 break; 3113 case 2: 3114 B->ops->mult = MatMult_SeqBAIJ_2; 3115 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3116 B->ops->sor = MatSOR_SeqBAIJ_2; 3117 break; 3118 case 3: 3119 B->ops->mult = MatMult_SeqBAIJ_3; 3120 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3121 B->ops->sor = MatSOR_SeqBAIJ_3; 3122 break; 3123 case 4: 3124 B->ops->mult = MatMult_SeqBAIJ_4; 3125 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3126 B->ops->sor = MatSOR_SeqBAIJ_4; 3127 break; 3128 case 5: 3129 B->ops->mult = MatMult_SeqBAIJ_5; 3130 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3131 B->ops->sor = MatSOR_SeqBAIJ_5; 3132 break; 3133 case 6: 3134 B->ops->mult = MatMult_SeqBAIJ_6; 3135 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3136 B->ops->sor = MatSOR_SeqBAIJ_6; 3137 break; 3138 case 7: 3139 B->ops->mult = MatMult_SeqBAIJ_7; 3140 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3141 B->ops->sor = MatSOR_SeqBAIJ_7; 3142 break; 3143 case 15: 3144 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3145 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3146 B->ops->sor = MatSOR_SeqBAIJ_N; 3147 break; 3148 default: 3149 B->ops->mult = MatMult_SeqBAIJ_N; 3150 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3151 B->ops->sor = MatSOR_SeqBAIJ_N; 3152 break; 3153 } 3154 } 3155 b->mbs = mbs; 3156 b->nbs = nbs; 3157 if (!skipallocation) { 3158 if (!b->imax) { 3159 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3160 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3161 b->free_imax_ilen = PETSC_TRUE; 3162 } 3163 /* b->ilen will count nonzeros in each block row so far. */ 3164 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3165 if (!nnz) { 3166 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3167 else if (nz < 0) nz = 1; 3168 for (i=0; i<mbs; i++) b->imax[i] = nz; 3169 nz = nz*mbs; 3170 } else { 3171 nz = 0; 3172 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3173 } 3174 3175 /* allocate the matrix space */ 3176 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3177 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3178 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3179 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3180 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3181 b->singlemalloc = PETSC_TRUE; 3182 b->i[0] = 0; 3183 for (i=1; i<mbs+1; i++) { 3184 b->i[i] = b->i[i-1] + b->imax[i-1]; 3185 } 3186 b->free_a = PETSC_TRUE; 3187 b->free_ij = PETSC_TRUE; 3188 } else { 3189 b->free_a = PETSC_FALSE; 3190 b->free_ij = PETSC_FALSE; 3191 } 3192 3193 b->bs2 = bs2; 3194 b->mbs = mbs; 3195 b->nz = 0; 3196 b->maxnz = nz; 3197 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3198 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3199 PetscFunctionReturn(0); 3200 } 3201 EXTERN_C_END 3202 3203 EXTERN_C_BEGIN 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3206 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3207 { 3208 PetscInt i,m,nz,nz_max=0,*nnz; 3209 PetscScalar *values=0; 3210 PetscErrorCode ierr; 3211 3212 PetscFunctionBegin; 3213 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3214 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3215 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3216 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3218 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3219 m = B->rmap->n/bs; 3220 3221 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3222 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3223 for (i=0; i<m; i++) { 3224 nz = ii[i+1]- ii[i]; 3225 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3226 nz_max = PetscMax(nz_max, nz); 3227 nnz[i] = nz; 3228 } 3229 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3230 ierr = PetscFree(nnz);CHKERRQ(ierr); 3231 3232 values = (PetscScalar*)V; 3233 if (!values) { 3234 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3235 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3236 } 3237 for (i=0; i<m; i++) { 3238 PetscInt ncols = ii[i+1] - ii[i]; 3239 const PetscInt *icols = jj + ii[i]; 3240 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3241 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3242 } 3243 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3244 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3245 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3246 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 EXTERN_C_END 3250 3251 3252 EXTERN_C_BEGIN 3253 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3254 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3255 #if defined(PETSC_HAVE_MUMPS) 3256 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3257 #endif 3258 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3259 EXTERN_C_END 3260 3261 /*MC 3262 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3263 block sparse compressed row format. 3264 3265 Options Database Keys: 3266 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3267 3268 Level: beginner 3269 3270 .seealso: MatCreateSeqBAIJ() 3271 M*/ 3272 3273 EXTERN_C_BEGIN 3274 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3275 EXTERN_C_END 3276 3277 EXTERN_C_BEGIN 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatCreate_SeqBAIJ" 3280 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3281 { 3282 PetscErrorCode ierr; 3283 PetscMPIInt size; 3284 Mat_SeqBAIJ *b; 3285 3286 PetscFunctionBegin; 3287 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3288 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3289 3290 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3291 B->data = (void*)b; 3292 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3293 b->row = 0; 3294 b->col = 0; 3295 b->icol = 0; 3296 b->reallocs = 0; 3297 b->saved_values = 0; 3298 3299 b->roworiented = PETSC_TRUE; 3300 b->nonew = 0; 3301 b->diag = 0; 3302 b->solve_work = 0; 3303 b->mult_work = 0; 3304 B->spptr = 0; 3305 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3306 b->keepnonzeropattern = PETSC_FALSE; 3307 b->xtoy = 0; 3308 b->XtoY = 0; 3309 B->same_nonzero = PETSC_FALSE; 3310 3311 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3312 "MatGetFactorAvailable_seqbaij_petsc", 3313 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3314 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3315 "MatGetFactor_seqbaij_petsc", 3316 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3317 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3318 "MatGetFactor_seqbaij_bstrm", 3319 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3320 #if defined(PETSC_HAVE_MUMPS) 3321 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3322 #endif 3323 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3324 "MatInvertBlockDiagonal_SeqBAIJ", 3325 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3326 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3327 "MatStoreValues_SeqBAIJ", 3328 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3329 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3330 "MatRetrieveValues_SeqBAIJ", 3331 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3333 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3334 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3335 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3336 "MatConvert_SeqBAIJ_SeqAIJ", 3337 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3339 "MatConvert_SeqBAIJ_SeqSBAIJ", 3340 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3342 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3343 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3345 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3346 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3348 "MatConvert_SeqBAIJ_SeqBSTRM", 3349 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3351 "MatIsTranspose_SeqBAIJ", 3352 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3353 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3354 PetscFunctionReturn(0); 3355 } 3356 EXTERN_C_END 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3360 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3361 { 3362 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3363 PetscErrorCode ierr; 3364 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3365 3366 PetscFunctionBegin; 3367 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3368 3369 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3370 c->imax = a->imax; 3371 c->ilen = a->ilen; 3372 c->free_imax_ilen = PETSC_FALSE; 3373 } else { 3374 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3375 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3376 for (i=0; i<mbs; i++) { 3377 c->imax[i] = a->imax[i]; 3378 c->ilen[i] = a->ilen[i]; 3379 } 3380 c->free_imax_ilen = PETSC_TRUE; 3381 } 3382 3383 /* allocate the matrix space */ 3384 if (mallocmatspace) { 3385 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3386 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3387 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3388 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3389 c->i = a->i; 3390 c->j = a->j; 3391 c->singlemalloc = PETSC_FALSE; 3392 c->free_a = PETSC_TRUE; 3393 c->free_ij = PETSC_FALSE; 3394 c->parent = A; 3395 C->preallocated = PETSC_TRUE; 3396 C->assembled = PETSC_TRUE; 3397 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3398 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3399 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3400 } else { 3401 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3402 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3403 c->singlemalloc = PETSC_TRUE; 3404 c->free_a = PETSC_TRUE; 3405 c->free_ij = PETSC_TRUE; 3406 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3407 if (mbs > 0) { 3408 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3409 if (cpvalues == MAT_COPY_VALUES) { 3410 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3411 } else { 3412 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3413 } 3414 } 3415 C->preallocated = PETSC_TRUE; 3416 C->assembled = PETSC_TRUE; 3417 } 3418 } 3419 3420 c->roworiented = a->roworiented; 3421 c->nonew = a->nonew; 3422 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3423 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3424 c->bs2 = a->bs2; 3425 c->mbs = a->mbs; 3426 c->nbs = a->nbs; 3427 3428 if (a->diag) { 3429 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3430 c->diag = a->diag; 3431 c->free_diag = PETSC_FALSE; 3432 } else { 3433 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3434 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3435 for (i=0; i<mbs; i++) { 3436 c->diag[i] = a->diag[i]; 3437 } 3438 c->free_diag = PETSC_TRUE; 3439 } 3440 } else c->diag = 0; 3441 c->nz = a->nz; 3442 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3443 c->solve_work = 0; 3444 c->mult_work = 0; 3445 3446 c->compressedrow.use = a->compressedrow.use; 3447 c->compressedrow.nrows = a->compressedrow.nrows; 3448 c->compressedrow.check = a->compressedrow.check; 3449 if (a->compressedrow.use) { 3450 i = a->compressedrow.nrows; 3451 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3452 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3453 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3454 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3455 } else { 3456 c->compressedrow.use = PETSC_FALSE; 3457 c->compressedrow.i = PETSC_NULL; 3458 c->compressedrow.rindex = PETSC_NULL; 3459 } 3460 C->same_nonzero = A->same_nonzero; 3461 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3462 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3468 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3469 { 3470 PetscErrorCode ierr; 3471 3472 PetscFunctionBegin; 3473 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3474 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3475 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3476 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3477 PetscFunctionReturn(0); 3478 } 3479 3480 #undef __FUNCT__ 3481 #define __FUNCT__ "MatLoad_SeqBAIJ" 3482 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3483 { 3484 Mat_SeqBAIJ *a; 3485 PetscErrorCode ierr; 3486 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3487 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3488 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3489 PetscInt *masked,nmask,tmp,bs2,ishift; 3490 PetscMPIInt size; 3491 int fd; 3492 PetscScalar *aa; 3493 MPI_Comm comm = ((PetscObject)viewer)->comm; 3494 3495 PetscFunctionBegin; 3496 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3497 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3498 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3499 bs2 = bs*bs; 3500 3501 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3502 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3503 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3504 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3505 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3506 M = header[1]; N = header[2]; nz = header[3]; 3507 3508 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3509 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3510 3511 /* 3512 This code adds extra rows to make sure the number of rows is 3513 divisible by the blocksize 3514 */ 3515 mbs = M/bs; 3516 extra_rows = bs - M + bs*(mbs); 3517 if (extra_rows == bs) extra_rows = 0; 3518 else mbs++; 3519 if (extra_rows) { 3520 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3521 } 3522 3523 /* Set global sizes if not already set */ 3524 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3525 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3526 } else { /* Check if the matrix global sizes are correct */ 3527 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3528 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3529 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3530 } 3531 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3532 } 3533 3534 /* read in row lengths */ 3535 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3536 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3537 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3538 3539 /* read in column indices */ 3540 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3541 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3542 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3543 3544 /* loop over row lengths determining block row lengths */ 3545 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3546 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3547 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3548 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3549 rowcount = 0; 3550 nzcount = 0; 3551 for (i=0; i<mbs; i++) { 3552 nmask = 0; 3553 for (j=0; j<bs; j++) { 3554 kmax = rowlengths[rowcount]; 3555 for (k=0; k<kmax; k++) { 3556 tmp = jj[nzcount++]/bs; 3557 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3558 } 3559 rowcount++; 3560 } 3561 browlengths[i] += nmask; 3562 /* zero out the mask elements we set */ 3563 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3564 } 3565 3566 /* Do preallocation */ 3567 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3568 a = (Mat_SeqBAIJ*)newmat->data; 3569 3570 /* set matrix "i" values */ 3571 a->i[0] = 0; 3572 for (i=1; i<= mbs; i++) { 3573 a->i[i] = a->i[i-1] + browlengths[i-1]; 3574 a->ilen[i-1] = browlengths[i-1]; 3575 } 3576 a->nz = 0; 3577 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3578 3579 /* read in nonzero values */ 3580 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3581 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3582 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3583 3584 /* set "a" and "j" values into matrix */ 3585 nzcount = 0; jcount = 0; 3586 for (i=0; i<mbs; i++) { 3587 nzcountb = nzcount; 3588 nmask = 0; 3589 for (j=0; j<bs; j++) { 3590 kmax = rowlengths[i*bs+j]; 3591 for (k=0; k<kmax; k++) { 3592 tmp = jj[nzcount++]/bs; 3593 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3594 } 3595 } 3596 /* sort the masked values */ 3597 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3598 3599 /* set "j" values into matrix */ 3600 maskcount = 1; 3601 for (j=0; j<nmask; j++) { 3602 a->j[jcount++] = masked[j]; 3603 mask[masked[j]] = maskcount++; 3604 } 3605 /* set "a" values into matrix */ 3606 ishift = bs2*a->i[i]; 3607 for (j=0; j<bs; j++) { 3608 kmax = rowlengths[i*bs+j]; 3609 for (k=0; k<kmax; k++) { 3610 tmp = jj[nzcountb]/bs ; 3611 block = mask[tmp] - 1; 3612 point = jj[nzcountb] - bs*tmp; 3613 idx = ishift + bs2*block + j + bs*point; 3614 a->a[idx] = (MatScalar)aa[nzcountb++]; 3615 } 3616 } 3617 /* zero out the mask elements we set */ 3618 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3619 } 3620 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3621 3622 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3623 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3624 ierr = PetscFree(aa);CHKERRQ(ierr); 3625 ierr = PetscFree(jj);CHKERRQ(ierr); 3626 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3627 3628 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3629 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3630 PetscFunctionReturn(0); 3631 } 3632 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatCreateSeqBAIJ" 3635 /*@C 3636 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3637 compressed row) format. For good matrix assembly performance the 3638 user should preallocate the matrix storage by setting the parameter nz 3639 (or the array nnz). By setting these parameters accurately, performance 3640 during matrix assembly can be increased by more than a factor of 50. 3641 3642 Collective on MPI_Comm 3643 3644 Input Parameters: 3645 + comm - MPI communicator, set to PETSC_COMM_SELF 3646 . bs - size of block 3647 . m - number of rows 3648 . n - number of columns 3649 . nz - number of nonzero blocks per block row (same for all rows) 3650 - nnz - array containing the number of nonzero blocks in the various block rows 3651 (possibly different for each block row) or PETSC_NULL 3652 3653 Output Parameter: 3654 . A - the matrix 3655 3656 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3657 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3658 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3659 3660 Options Database Keys: 3661 . -mat_no_unroll - uses code that does not unroll the loops in the 3662 block calculations (much slower) 3663 . -mat_block_size - size of the blocks to use 3664 3665 Level: intermediate 3666 3667 Notes: 3668 The number of rows and columns must be divisible by blocksize. 3669 3670 If the nnz parameter is given then the nz parameter is ignored 3671 3672 A nonzero block is any block that as 1 or more nonzeros in it 3673 3674 The block AIJ format is fully compatible with standard Fortran 77 3675 storage. That is, the stored row and column indices can begin at 3676 either one (as in Fortran) or zero. See the users' manual for details. 3677 3678 Specify the preallocated storage with either nz or nnz (not both). 3679 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3680 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3681 matrices. 3682 3683 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3684 @*/ 3685 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3686 { 3687 PetscErrorCode ierr; 3688 3689 PetscFunctionBegin; 3690 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3691 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3692 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3693 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3694 PetscFunctionReturn(0); 3695 } 3696 3697 #undef __FUNCT__ 3698 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3699 /*@C 3700 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3701 per row in the matrix. For good matrix assembly performance the 3702 user should preallocate the matrix storage by setting the parameter nz 3703 (or the array nnz). By setting these parameters accurately, performance 3704 during matrix assembly can be increased by more than a factor of 50. 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + A - the matrix 3710 . bs - size of block 3711 . nz - number of block nonzeros per block row (same for all rows) 3712 - nnz - array containing the number of block nonzeros in the various block rows 3713 (possibly different for each block row) or PETSC_NULL 3714 3715 Options Database Keys: 3716 . -mat_no_unroll - uses code that does not unroll the loops in the 3717 block calculations (much slower) 3718 . -mat_block_size - size of the blocks to use 3719 3720 Level: intermediate 3721 3722 Notes: 3723 If the nnz parameter is given then the nz parameter is ignored 3724 3725 You can call MatGetInfo() to get information on how effective the preallocation was; 3726 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3727 You can also run with the option -info and look for messages with the string 3728 malloc in them to see if additional memory allocation was needed. 3729 3730 The block AIJ format is fully compatible with standard Fortran 77 3731 storage. That is, the stored row and column indices can begin at 3732 either one (as in Fortran) or zero. See the users' manual for details. 3733 3734 Specify the preallocated storage with either nz or nnz (not both). 3735 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3736 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3737 3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3739 @*/ 3740 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3741 { 3742 PetscErrorCode ierr; 3743 3744 PetscFunctionBegin; 3745 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3746 PetscValidType(B,1); 3747 PetscValidLogicalCollectiveInt(B,bs,2); 3748 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3749 PetscFunctionReturn(0); 3750 } 3751 3752 #undef __FUNCT__ 3753 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3754 /*@C 3755 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3756 (the default sequential PETSc format). 3757 3758 Collective on MPI_Comm 3759 3760 Input Parameters: 3761 + A - the matrix 3762 . i - the indices into j for the start of each local row (starts with zero) 3763 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3764 - v - optional values in the matrix 3765 3766 Level: developer 3767 3768 .keywords: matrix, aij, compressed row, sparse 3769 3770 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3771 @*/ 3772 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3773 { 3774 PetscErrorCode ierr; 3775 3776 PetscFunctionBegin; 3777 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3778 PetscValidType(B,1); 3779 PetscValidLogicalCollectiveInt(B,bs,2); 3780 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3781 PetscFunctionReturn(0); 3782 } 3783 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3787 /*@ 3788 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3789 3790 Collective on MPI_Comm 3791 3792 Input Parameters: 3793 + comm - must be an MPI communicator of size 1 3794 . bs - size of block 3795 . m - number of rows 3796 . n - number of columns 3797 . i - row indices 3798 . j - column indices 3799 - a - matrix values 3800 3801 Output Parameter: 3802 . mat - the matrix 3803 3804 Level: advanced 3805 3806 Notes: 3807 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3808 once the matrix is destroyed 3809 3810 You cannot set new nonzero locations into this matrix, that will generate an error. 3811 3812 The i and j indices are 0 based 3813 3814 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3815 3816 3817 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3818 3819 @*/ 3820 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3821 { 3822 PetscErrorCode ierr; 3823 PetscInt ii; 3824 Mat_SeqBAIJ *baij; 3825 3826 PetscFunctionBegin; 3827 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3828 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3829 3830 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3831 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3832 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3833 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3834 baij = (Mat_SeqBAIJ*)(*mat)->data; 3835 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3836 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3837 3838 baij->i = i; 3839 baij->j = j; 3840 baij->a = a; 3841 baij->singlemalloc = PETSC_FALSE; 3842 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3843 baij->free_a = PETSC_FALSE; 3844 baij->free_ij = PETSC_FALSE; 3845 3846 for (ii=0; ii<m; ii++) { 3847 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3848 #if defined(PETSC_USE_DEBUG) 3849 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3850 #endif 3851 } 3852 #if defined(PETSC_USE_DEBUG) 3853 for (ii=0; ii<baij->i[m]; ii++) { 3854 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3855 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3856 } 3857 #endif 3858 3859 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3860 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863