1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 154 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 155 156 if (flag & SOR_ZERO_INITIAL_GUESS) { 157 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 158 x[0] = b[0]*idiag[0]; 159 i2 = 1; 160 idiag += 1; 161 for (i=1; i<m; i++) { 162 v = aa + ai[i]; 163 vi = aj + ai[i]; 164 nz = diag[i] - ai[i]; 165 s1 = b[i2]; 166 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 167 x[i2] = idiag[0]*s1; 168 idiag += 1; 169 i2 += 1; 170 } 171 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 172 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 173 } 174 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 175 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 176 i2 = 0; 177 mdiag = a->idiag+a->mbs; 178 for (i=0; i<m; i++) { 179 x1 = x[i2]; 180 x[i2] = mdiag[0]*x1; 181 mdiag += 1; 182 i2 += 1; 183 } 184 ierr = PetscLogFlops(m);CHKERRQ(ierr); 185 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 186 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 187 } 188 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 189 idiag = a->idiag+a->mbs - 1; 190 i2 = m - 1; 191 x1 = x[i2]; 192 x[i2] = idiag[0]*x1; 193 idiag -= 1; 194 i2 -= 1; 195 for (i=m-2; i>=0; i--) { 196 v = aa + (diag[i]+1); 197 vi = aj + diag[i] + 1; 198 nz = ai[i+1] - diag[i] - 1; 199 s1 = x[i2]; 200 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 201 x[i2] = idiag[0]*s1; 202 idiag -= 1; 203 i2 -= 1; 204 } 205 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 206 } 207 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 209 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 215 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 PetscScalar *x,x1,x2,s1,s2; 219 const PetscScalar *b; 220 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 221 PetscErrorCode ierr; 222 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 223 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 224 225 PetscFunctionBegin; 226 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 227 its = its*lits; 228 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 229 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 230 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 231 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 232 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 233 234 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 235 236 if (!m) PetscFunctionReturn(0); 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 for (j=0; j<nz; j++) { 254 idx = 2*vi[j]; 255 it = 4*j; 256 x1 = x[idx]; x2 = x[1+idx]; 257 s1 -= v[it]*x1 + v[it+2]*x2; 258 s2 -= v[it+1]*x1 + v[it+3]*x2; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 for (j=0; j<nz; j++) { 297 idx = 2*vi[j]; 298 it = 4*j; 299 x1 = x[idx]; x2 = x[1+idx]; 300 s1 -= v[it]*x1 + v[it+2]*x2; 301 s2 -= v[it+1]*x1 + v[it+3]*x2; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 309 } 310 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 311 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 312 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 318 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 319 { 320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 321 PetscScalar *x,x1,x2,x3,s1,s2,s3; 322 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 323 const PetscScalar *b; 324 PetscErrorCode ierr; 325 PetscInt m = a->mbs,i,i2,nz,idx; 326 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 327 328 PetscFunctionBegin; 329 its = its*lits; 330 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 331 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 332 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 333 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 334 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 335 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 336 337 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 338 339 if (!m) PetscFunctionReturn(0); 340 diag = a->diag; 341 idiag = a->idiag; 342 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 344 345 if (flag & SOR_ZERO_INITIAL_GUESS) { 346 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 347 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 348 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 349 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 350 i2 = 3; 351 idiag += 9; 352 for (i=1; i<m; i++) { 353 v = aa + 9*ai[i]; 354 vi = aj + ai[i]; 355 nz = diag[i] - ai[i]; 356 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 357 while (nz--) { 358 idx = 3*(*vi++); 359 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 360 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 361 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 362 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 363 v += 9; 364 } 365 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 366 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 367 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 368 idiag += 9; 369 i2 += 3; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 373 } 374 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 375 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 376 i2 = 0; 377 mdiag = a->idiag+9*a->mbs; 378 for (i=0; i<m; i++) { 379 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 380 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 381 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 382 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 383 mdiag += 9; 384 i2 += 3; 385 } 386 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 387 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 388 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 389 } 390 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 391 idiag = a->idiag+9*a->mbs - 9; 392 i2 = 3*m - 3; 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 394 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 395 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 396 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 397 idiag -= 9; 398 i2 -= 3; 399 for (i=m-2; i>=0; i--) { 400 v = aa + 9*(diag[i]+1); 401 vi = aj + diag[i] + 1; 402 nz = ai[i+1] - diag[i] - 1; 403 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 404 while (nz--) { 405 idx = 3*(*vi++); 406 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 407 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 408 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 409 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 410 v += 9; 411 } 412 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 413 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 414 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 415 idiag -= 9; 416 i2 -= 3; 417 } 418 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 419 } 420 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 421 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 428 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 429 { 430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 431 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 432 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 433 const PetscScalar *b; 434 PetscErrorCode ierr; 435 PetscInt m = a->mbs,i,i2,nz,idx; 436 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 437 438 PetscFunctionBegin; 439 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 440 its = its*lits; 441 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 442 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 443 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 444 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 445 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 446 447 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 448 449 if (!m) PetscFunctionReturn(0); 450 diag = a->diag; 451 idiag = a->idiag; 452 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 453 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 454 455 if (flag & SOR_ZERO_INITIAL_GUESS) { 456 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 457 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 458 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 459 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 460 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 461 i2 = 4; 462 idiag += 16; 463 for (i=1; i<m; i++) { 464 v = aa + 16*ai[i]; 465 vi = aj + ai[i]; 466 nz = diag[i] - ai[i]; 467 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 468 while (nz--) { 469 idx = 4*(*vi++); 470 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 471 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 472 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 473 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 474 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 475 v += 16; 476 } 477 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 478 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 479 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 480 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 481 idiag += 16; 482 i2 += 4; 483 } 484 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 485 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 486 } 487 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 488 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 489 i2 = 0; 490 mdiag = a->idiag+16*a->mbs; 491 for (i=0; i<m; i++) { 492 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 493 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 494 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 495 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 496 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 497 mdiag += 16; 498 i2 += 4; 499 } 500 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 501 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 502 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 503 } 504 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 505 idiag = a->idiag+16*a->mbs - 16; 506 i2 = 4*m - 4; 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 508 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 509 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 510 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 511 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 512 idiag -= 16; 513 i2 -= 4; 514 for (i=m-2; i>=0; i--) { 515 v = aa + 16*(diag[i]+1); 516 vi = aj + diag[i] + 1; 517 nz = ai[i+1] - diag[i] - 1; 518 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 519 while (nz--) { 520 idx = 4*(*vi++); 521 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 522 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 523 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 524 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 525 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 526 v += 16; 527 } 528 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 529 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 530 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 531 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 532 idiag -= 16; 533 i2 -= 4; 534 } 535 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 536 } 537 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 538 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 545 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 546 { 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 549 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 550 const PetscScalar *b; 551 PetscErrorCode ierr; 552 PetscInt m = a->mbs,i,i2,nz,idx; 553 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 554 555 PetscFunctionBegin; 556 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 557 its = its*lits; 558 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 559 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 560 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 561 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 562 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 563 564 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 565 566 if (!m) PetscFunctionReturn(0); 567 diag = a->diag; 568 idiag = a->idiag; 569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 570 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 571 572 if (flag & SOR_ZERO_INITIAL_GUESS) { 573 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 574 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 575 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 576 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 577 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 578 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 579 i2 = 5; 580 idiag += 25; 581 for (i=1; i<m; i++) { 582 v = aa + 25*ai[i]; 583 vi = aj + ai[i]; 584 nz = diag[i] - ai[i]; 585 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 586 while (nz--) { 587 idx = 5*(*vi++); 588 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 589 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 590 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 591 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 592 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 593 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 594 v += 25; 595 } 596 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 597 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 598 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 599 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 600 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 601 idiag += 25; 602 i2 += 5; 603 } 604 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 605 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 606 } 607 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 608 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 609 i2 = 0; 610 mdiag = a->idiag+25*a->mbs; 611 for (i=0; i<m; i++) { 612 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 613 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 614 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 615 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 616 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 617 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 618 mdiag += 25; 619 i2 += 5; 620 } 621 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 622 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 623 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 624 } 625 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 idiag = a->idiag+25*a->mbs - 25; 627 i2 = 5*m - 5; 628 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 629 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 630 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 631 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 632 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 633 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 634 idiag -= 25; 635 i2 -= 5; 636 for (i=m-2; i>=0; i--) { 637 v = aa + 25*(diag[i]+1); 638 vi = aj + diag[i] + 1; 639 nz = ai[i+1] - diag[i] - 1; 640 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 641 while (nz--) { 642 idx = 5*(*vi++); 643 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 644 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 645 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 646 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 647 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 648 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 649 v += 25; 650 } 651 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 652 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 653 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 654 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 655 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 656 idiag -= 25; 657 i2 -= 5; 658 } 659 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 662 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 669 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 670 { 671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 672 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 673 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 674 const PetscScalar *b; 675 PetscErrorCode ierr; 676 PetscInt m = a->mbs,i,i2,nz,idx; 677 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 678 679 PetscFunctionBegin; 680 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 681 its = its*lits; 682 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 683 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 684 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 685 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 686 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 687 688 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 689 690 if (!m) PetscFunctionReturn(0); 691 diag = a->diag; 692 idiag = a->idiag; 693 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 694 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 695 696 if (flag & SOR_ZERO_INITIAL_GUESS) { 697 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 698 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 699 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 700 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 701 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 702 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 703 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 704 i2 = 6; 705 idiag += 36; 706 for (i=1; i<m; i++) { 707 v = aa + 36*ai[i]; 708 vi = aj + ai[i]; 709 nz = diag[i] - ai[i]; 710 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 711 while (nz--) { 712 idx = 6*(*vi++); 713 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 714 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 715 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 716 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 717 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 718 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 719 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 720 v += 36; 721 } 722 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 723 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 724 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 725 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 726 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 727 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 728 idiag += 36; 729 i2 += 6; 730 } 731 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 732 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 733 } 734 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 735 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 736 i2 = 0; 737 mdiag = a->idiag+36*a->mbs; 738 for (i=0; i<m; i++) { 739 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 740 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 741 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 742 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 743 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 744 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 745 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 746 mdiag += 36; 747 i2 += 6; 748 } 749 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 750 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 751 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 752 } 753 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 754 idiag = a->idiag+36*a->mbs - 36; 755 i2 = 6*m - 6; 756 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 757 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 758 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 759 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 760 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 761 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 762 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 763 idiag -= 36; 764 i2 -= 6; 765 for (i=m-2; i>=0; i--) { 766 v = aa + 36*(diag[i]+1); 767 vi = aj + diag[i] + 1; 768 nz = ai[i+1] - diag[i] - 1; 769 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 770 while (nz--) { 771 idx = 6*(*vi++); 772 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 773 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 774 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 775 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 776 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 777 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 778 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 779 v += 36; 780 } 781 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 782 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 783 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 784 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 785 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 786 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 787 idiag -= 36; 788 i2 -= 6; 789 } 790 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 791 } 792 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 793 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 794 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 800 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 801 { 802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 803 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 804 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 805 const PetscScalar *b; 806 PetscErrorCode ierr; 807 PetscInt m = a->mbs,i,i2,nz,idx; 808 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 809 810 PetscFunctionBegin; 811 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 812 its = its*lits; 813 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 814 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 815 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 816 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 817 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 818 819 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 820 821 if (!m) PetscFunctionReturn(0); 822 diag = a->diag; 823 idiag = a->idiag; 824 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 826 827 if (flag & SOR_ZERO_INITIAL_GUESS) { 828 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 829 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 830 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 831 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 832 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 833 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 834 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 835 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 836 i2 = 7; 837 idiag += 49; 838 for (i=1; i<m; i++) { 839 v = aa + 49*ai[i]; 840 vi = aj + ai[i]; 841 nz = diag[i] - ai[i]; 842 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 843 while (nz--) { 844 idx = 7*(*vi++); 845 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 846 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 847 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 848 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 849 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 850 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 851 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 852 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 853 v += 49; 854 } 855 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 856 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 857 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 858 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 859 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 860 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 861 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 862 idiag += 49; 863 i2 += 7; 864 } 865 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 866 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 867 } 868 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 869 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 870 i2 = 0; 871 mdiag = a->idiag+49*a->mbs; 872 for (i=0; i<m; i++) { 873 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 874 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 875 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 876 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 877 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 878 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 879 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 880 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 881 mdiag += 49; 882 i2 += 7; 883 } 884 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 885 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 886 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 887 } 888 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 889 idiag = a->idiag+49*a->mbs - 49; 890 i2 = 7*m - 7; 891 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 892 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 893 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 894 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 895 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 896 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 897 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 898 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 899 idiag -= 49; 900 i2 -= 7; 901 for (i=m-2; i>=0; i--) { 902 v = aa + 49*(diag[i]+1); 903 vi = aj + diag[i] + 1; 904 nz = ai[i+1] - diag[i] - 1; 905 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 906 while (nz--) { 907 idx = 7*(*vi++); 908 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 909 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 910 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 911 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 912 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 913 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 914 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 915 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 916 v += 49; 917 } 918 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 919 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 920 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 921 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 922 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 923 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 924 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 925 idiag -= 49; 926 i2 -= 7; 927 } 928 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 929 } 930 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 931 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 932 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 938 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 939 { 940 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 941 PetscScalar *x,*work,*w,*workt; 942 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 943 const PetscScalar *b; 944 PetscErrorCode ierr; 945 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 946 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 947 948 PetscFunctionBegin; 949 its = its*lits; 950 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 951 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 952 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 953 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 954 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 955 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 956 957 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 958 959 if (!m) PetscFunctionReturn(0); 960 diag = a->diag; 961 idiag = a->idiag; 962 if (!a->mult_work) { 963 k = PetscMax(A->rmap->n,A->cmap->n); 964 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 965 } 966 work = a->mult_work; 967 if (!a->sor_work) { 968 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 969 } 970 w = a->sor_work; 971 972 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 973 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 974 975 if (flag & SOR_ZERO_INITIAL_GUESS) { 976 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 977 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 978 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 979 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 980 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 981 i2 = bs; 982 idiag += bs2; 983 for (i=1; i<m; i++) { 984 v = aa + bs2*ai[i]; 985 vi = aj + ai[i]; 986 nz = diag[i] - ai[i]; 987 988 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 989 /* copy all rows of x that are needed into contiguous space */ 990 workt = work; 991 for (j=0; j<nz; j++) { 992 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 993 workt += bs; 994 } 995 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 996 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 997 while (nz--) { 998 idx = N*(*vi++); 999 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1000 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1001 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1002 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1003 v += N2; 1004 } */ 1005 1006 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1007 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1008 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1009 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1010 1011 idiag += bs2; 1012 i2 += bs; 1013 } 1014 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1015 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1016 } 1017 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1018 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1019 i2 = 0; 1020 mdiag = a->idiag+bs2*a->mbs; 1021 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1022 for (i=0; i<m; i++) { 1023 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1024 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1025 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1026 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1027 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1028 1029 mdiag += bs2; 1030 i2 += bs; 1031 } 1032 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1033 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1034 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1035 } 1036 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1037 idiag = a->idiag+bs2*a->mbs - bs2; 1038 i2 = bs*m - bs; 1039 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1040 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1041 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1042 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1043 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1044 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1045 idiag -= bs2; 1046 i2 -= bs; 1047 for (i=m-2; i>=0; i--) { 1048 v = aa + bs2*(diag[i]+1); 1049 vi = aj + diag[i] + 1; 1050 nz = ai[i+1] - diag[i] - 1; 1051 1052 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 /* copy all rows of x that are needed into contiguous space */ 1054 workt = work; 1055 for (j=0; j<nz; j++) { 1056 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1057 workt += bs; 1058 } 1059 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1060 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1061 while (nz--) { 1062 idx = N*(*vi++); 1063 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1064 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1065 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1066 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1067 v += N2; 1068 } */ 1069 1070 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1071 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1072 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1073 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1074 idiag -= bs2; 1075 i2 -= bs; 1076 } 1077 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1078 } 1079 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1080 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1081 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* 1086 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1087 */ 1088 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1089 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1090 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1091 #define matsetvaluesblocked4_ matsetvaluesblocked4 1092 #endif 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "matsetvaluesblocked4_" 1096 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1097 { 1098 Mat A = *AA; 1099 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1100 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1101 PetscInt *ai =a->i,*ailen=a->ilen; 1102 PetscInt *aj =a->j,stepval,lastcol = -1; 1103 const PetscScalar *value = v; 1104 MatScalar *ap,*aa = a->a,*bap; 1105 1106 PetscFunctionBegin; 1107 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1108 stepval = (n-1)*4; 1109 for (k=0; k<m; k++) { /* loop over added rows */ 1110 row = im[k]; 1111 rp = aj + ai[row]; 1112 ap = aa + 16*ai[row]; 1113 nrow = ailen[row]; 1114 low = 0; 1115 high = nrow; 1116 for (l=0; l<n; l++) { /* loop over added columns */ 1117 col = in[l]; 1118 if (col <= lastcol) low = 0; 1119 else high = nrow; 1120 lastcol = col; 1121 value = v + k*(stepval+4 + l)*4; 1122 while (high-low > 7) { 1123 t = (low+high)/2; 1124 if (rp[t] > col) high = t; 1125 else low = t; 1126 } 1127 for (i=low; i<high; i++) { 1128 if (rp[i] > col) break; 1129 if (rp[i] == col) { 1130 bap = ap + 16*i; 1131 for (ii=0; ii<4; ii++,value+=stepval) { 1132 for (jj=ii; jj<16; jj+=4) { 1133 bap[jj] += *value++; 1134 } 1135 } 1136 goto noinsert2; 1137 } 1138 } 1139 N = nrow++ - 1; 1140 high++; /* added new column index thus must search to one higher than before */ 1141 /* shift up all the later entries in this row */ 1142 for (ii=N; ii>=i; ii--) { 1143 rp[ii+1] = rp[ii]; 1144 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1145 } 1146 if (N >= i) { 1147 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1148 } 1149 rp[i] = col; 1150 bap = ap + 16*i; 1151 for (ii=0; ii<4; ii++,value+=stepval) { 1152 for (jj=ii; jj<16; jj+=4) { 1153 bap[jj] = *value++; 1154 } 1155 } 1156 noinsert2:; 1157 low = i; 1158 } 1159 ailen[row] = nrow; 1160 } 1161 PetscFunctionReturnVoid(); 1162 } 1163 1164 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1165 #define matsetvalues4_ MATSETVALUES4 1166 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1167 #define matsetvalues4_ matsetvalues4 1168 #endif 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatSetValues4_" 1172 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1173 { 1174 Mat A = *AA; 1175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1176 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1177 PetscInt *ai=a->i,*ailen=a->ilen; 1178 PetscInt *aj=a->j,brow,bcol; 1179 PetscInt ridx,cidx,lastcol = -1; 1180 MatScalar *ap,value,*aa=a->a,*bap; 1181 1182 PetscFunctionBegin; 1183 for (k=0; k<m; k++) { /* loop over added rows */ 1184 row = im[k]; brow = row/4; 1185 rp = aj + ai[brow]; 1186 ap = aa + 16*ai[brow]; 1187 nrow = ailen[brow]; 1188 low = 0; 1189 high = nrow; 1190 for (l=0; l<n; l++) { /* loop over added columns */ 1191 col = in[l]; bcol = col/4; 1192 ridx = row % 4; cidx = col % 4; 1193 value = v[l + k*n]; 1194 if (col <= lastcol) low = 0; 1195 else high = nrow; 1196 lastcol = col; 1197 while (high-low > 7) { 1198 t = (low+high)/2; 1199 if (rp[t] > bcol) high = t; 1200 else low = t; 1201 } 1202 for (i=low; i<high; i++) { 1203 if (rp[i] > bcol) break; 1204 if (rp[i] == bcol) { 1205 bap = ap + 16*i + 4*cidx + ridx; 1206 *bap += value; 1207 goto noinsert1; 1208 } 1209 } 1210 N = nrow++ - 1; 1211 high++; /* added new column thus must search to one higher than before */ 1212 /* shift up all the later entries in this row */ 1213 for (ii=N; ii>=i; ii--) { 1214 rp[ii+1] = rp[ii]; 1215 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1216 } 1217 if (N>=i) { 1218 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1219 } 1220 rp[i] = bcol; 1221 ap[16*i + 4*cidx + ridx] = value; 1222 noinsert1:; 1223 low = i; 1224 } 1225 ailen[brow] = nrow; 1226 } 1227 PetscFunctionReturnVoid(); 1228 } 1229 1230 /* 1231 Checks for missing diagonals 1232 */ 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1235 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1238 PetscErrorCode ierr; 1239 PetscInt *diag,*jj = a->j,i; 1240 1241 PetscFunctionBegin; 1242 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1243 *missing = PETSC_FALSE; 1244 if (A->rmap->n > 0 && !jj) { 1245 *missing = PETSC_TRUE; 1246 if (d) *d = 0; 1247 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1248 } else { 1249 diag = a->diag; 1250 for (i=0; i<a->mbs; i++) { 1251 if (jj[diag[i]] != i) { 1252 *missing = PETSC_TRUE; 1253 if (d) *d = i; 1254 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1255 break; 1256 } 1257 } 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1264 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 PetscInt i,j,m = a->mbs; 1269 1270 PetscFunctionBegin; 1271 if (!a->diag) { 1272 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1273 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1274 a->free_diag = PETSC_TRUE; 1275 } 1276 for (i=0; i<m; i++) { 1277 a->diag[i] = a->i[i+1]; 1278 for (j=a->i[i]; j<a->i[i+1]; j++) { 1279 if (a->j[j] == i) { 1280 a->diag[i] = j; 1281 break; 1282 } 1283 } 1284 } 1285 PetscFunctionReturn(0); 1286 } 1287 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1291 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1292 { 1293 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1296 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1297 1298 PetscFunctionBegin; 1299 *nn = n; 1300 if (!ia) PetscFunctionReturn(0); 1301 if (symmetric) { 1302 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1303 nz = tia[n]; 1304 } else { 1305 tia = a->i; tja = a->j; 1306 } 1307 1308 if (!blockcompressed && bs > 1) { 1309 (*nn) *= bs; 1310 /* malloc & create the natural set of indices */ 1311 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1312 if (n) { 1313 (*ia)[0] = 0; 1314 for (j=1; j<bs; j++) { 1315 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1316 } 1317 } 1318 1319 for (i=1; i<n; i++) { 1320 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1321 for (j=1; j<bs; j++) { 1322 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1323 } 1324 } 1325 if (n) { 1326 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1327 } 1328 1329 if (inja) { 1330 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<n; i++) { 1333 for (j=0; j<bs; j++) { 1334 for (k=tia[i]; k<tia[i+1]; k++) { 1335 for (l=0; l<bs; l++) { 1336 (*ja)[cnt++] = bs*tja[k] + l; 1337 } 1338 } 1339 } 1340 } 1341 } 1342 1343 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1344 ierr = PetscFree(tia);CHKERRQ(ierr); 1345 ierr = PetscFree(tja);CHKERRQ(ierr); 1346 } 1347 } else if (oshift == 1) { 1348 if (symmetric) { 1349 nz = tia[A->rmap->n/bs]; 1350 /* add 1 to i and j indices */ 1351 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1352 *ia = tia; 1353 if (ja) { 1354 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1355 *ja = tja; 1356 } 1357 } else { 1358 nz = a->i[A->rmap->n/bs]; 1359 /* malloc space and add 1 to i and j indices */ 1360 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1361 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1362 if (ja) { 1363 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1364 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1365 } 1366 } 1367 } else { 1368 *ia = tia; 1369 if (ja) *ja = tja; 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1376 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1377 { 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 if (!ia) PetscFunctionReturn(0); 1382 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1383 ierr = PetscFree(*ia);CHKERRQ(ierr); 1384 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1391 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1392 { 1393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 #if defined(PETSC_USE_LOG) 1398 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1399 #endif 1400 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1401 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1402 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1403 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1404 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1405 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1406 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1407 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1408 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1409 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1410 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1411 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1412 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1413 1414 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1415 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1416 ierr = PetscFree(A->data);CHKERRQ(ierr); 1417 1418 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr); 1425 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1426 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1427 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1434 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1435 { 1436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1437 PetscErrorCode ierr; 1438 1439 PetscFunctionBegin; 1440 switch (op) { 1441 case MAT_ROW_ORIENTED: 1442 a->roworiented = flg; 1443 break; 1444 case MAT_KEEP_NONZERO_PATTERN: 1445 a->keepnonzeropattern = flg; 1446 break; 1447 case MAT_NEW_NONZERO_LOCATIONS: 1448 a->nonew = (flg ? 0 : 1); 1449 break; 1450 case MAT_NEW_NONZERO_LOCATION_ERR: 1451 a->nonew = (flg ? -1 : 0); 1452 break; 1453 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1454 a->nonew = (flg ? -2 : 0); 1455 break; 1456 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1457 a->nounused = (flg ? -1 : 0); 1458 break; 1459 case MAT_CHECK_COMPRESSED_ROW: 1460 a->compressedrow.check = flg; 1461 break; 1462 case MAT_NEW_DIAGONALS: 1463 case MAT_IGNORE_OFF_PROC_ENTRIES: 1464 case MAT_USE_HASH_TABLE: 1465 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1466 break; 1467 case MAT_SPD: 1468 case MAT_SYMMETRIC: 1469 case MAT_STRUCTURALLY_SYMMETRIC: 1470 case MAT_HERMITIAN: 1471 case MAT_SYMMETRY_ETERNAL: 1472 /* These options are handled directly by MatSetOption() */ 1473 break; 1474 default: 1475 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1482 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1483 { 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1487 MatScalar *aa,*aa_i; 1488 PetscScalar *v_i; 1489 1490 PetscFunctionBegin; 1491 bs = A->rmap->bs; 1492 ai = a->i; 1493 aj = a->j; 1494 aa = a->a; 1495 bs2 = a->bs2; 1496 1497 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1498 1499 bn = row/bs; /* Block number */ 1500 bp = row % bs; /* Block Position */ 1501 M = ai[bn+1] - ai[bn]; 1502 *nz = bs*M; 1503 1504 if (v) { 1505 *v = 0; 1506 if (*nz) { 1507 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1508 for (i=0; i<M; i++) { /* for each block in the block row */ 1509 v_i = *v + i*bs; 1510 aa_i = aa + bs2*(ai[bn] + i); 1511 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1512 } 1513 } 1514 } 1515 1516 if (idx) { 1517 *idx = 0; 1518 if (*nz) { 1519 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1520 for (i=0; i<M; i++) { /* for each block in the block row */ 1521 idx_i = *idx + i*bs; 1522 itmp = bs*aj[ai[bn] + i]; 1523 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1524 } 1525 } 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1532 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1538 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1539 PetscFunctionReturn(0); 1540 } 1541 1542 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1546 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1547 { 1548 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1549 Mat C; 1550 PetscErrorCode ierr; 1551 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1552 PetscInt *rows,*cols,bs2=a->bs2; 1553 MatScalar *array; 1554 1555 PetscFunctionBegin; 1556 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1557 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1558 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1559 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1560 1561 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1562 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1563 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1564 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1565 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1566 ierr = PetscFree(col);CHKERRQ(ierr); 1567 } else { 1568 C = *B; 1569 } 1570 1571 array = a->a; 1572 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1573 for (i=0; i<mbs; i++) { 1574 cols[0] = i*bs; 1575 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1576 len = ai[i+1] - ai[i]; 1577 for (j=0; j<len; j++) { 1578 rows[0] = (*aj++)*bs; 1579 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1580 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1581 array += bs2; 1582 } 1583 } 1584 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1585 1586 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1588 1589 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1590 *B = C; 1591 } else { 1592 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1599 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1600 { 1601 PetscErrorCode ierr; 1602 Mat Btrans; 1603 1604 PetscFunctionBegin; 1605 *f = PETSC_FALSE; 1606 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1607 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1608 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1609 PetscFunctionReturn(0); 1610 } 1611 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1614 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1615 { 1616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1617 PetscErrorCode ierr; 1618 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1619 int fd; 1620 PetscScalar *aa; 1621 FILE *file; 1622 1623 PetscFunctionBegin; 1624 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1625 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1626 col_lens[0] = MAT_FILE_CLASSID; 1627 1628 col_lens[1] = A->rmap->N; 1629 col_lens[2] = A->cmap->n; 1630 col_lens[3] = a->nz*bs2; 1631 1632 /* store lengths of each row and write (including header) to file */ 1633 count = 0; 1634 for (i=0; i<a->mbs; i++) { 1635 for (j=0; j<bs; j++) { 1636 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1637 } 1638 } 1639 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1640 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1641 1642 /* store column indices (zero start index) */ 1643 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1644 count = 0; 1645 for (i=0; i<a->mbs; i++) { 1646 for (j=0; j<bs; j++) { 1647 for (k=a->i[i]; k<a->i[i+1]; k++) { 1648 for (l=0; l<bs; l++) { 1649 jj[count++] = bs*a->j[k] + l; 1650 } 1651 } 1652 } 1653 } 1654 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1655 ierr = PetscFree(jj);CHKERRQ(ierr); 1656 1657 /* store nonzero values */ 1658 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1659 count = 0; 1660 for (i=0; i<a->mbs; i++) { 1661 for (j=0; j<bs; j++) { 1662 for (k=a->i[i]; k<a->i[i+1]; k++) { 1663 for (l=0; l<bs; l++) { 1664 aa[count++] = a->a[bs2*k + l*bs + j]; 1665 } 1666 } 1667 } 1668 } 1669 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1670 ierr = PetscFree(aa);CHKERRQ(ierr); 1671 1672 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1673 if (file) { 1674 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1681 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1682 { 1683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1684 PetscErrorCode ierr; 1685 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1686 PetscViewerFormat format; 1687 1688 PetscFunctionBegin; 1689 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1690 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1691 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1692 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1693 Mat aij; 1694 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1695 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1696 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1697 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1698 PetscFunctionReturn(0); 1699 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1700 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1701 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1702 for (i=0; i<a->mbs; i++) { 1703 for (j=0; j<bs; j++) { 1704 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1705 for (k=a->i[i]; k<a->i[i+1]; k++) { 1706 for (l=0; l<bs; l++) { 1707 #if defined(PETSC_USE_COMPLEX) 1708 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1709 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1710 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1711 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1712 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1713 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1714 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1715 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1716 } 1717 #else 1718 if (a->a[bs2*k + l*bs + j] != 0.0) { 1719 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1720 } 1721 #endif 1722 } 1723 } 1724 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1725 } 1726 } 1727 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1728 } else { 1729 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1730 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1731 for (i=0; i<a->mbs; i++) { 1732 for (j=0; j<bs; j++) { 1733 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1734 for (k=a->i[i]; k<a->i[i+1]; k++) { 1735 for (l=0; l<bs; l++) { 1736 #if defined(PETSC_USE_COMPLEX) 1737 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1738 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1739 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1740 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1741 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1742 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1743 } else { 1744 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1745 } 1746 #else 1747 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1748 #endif 1749 } 1750 } 1751 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1752 } 1753 } 1754 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1755 } 1756 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #include <petscdraw.h> 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1763 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1764 { 1765 Mat A = (Mat) Aa; 1766 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1767 PetscErrorCode ierr; 1768 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1769 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1770 MatScalar *aa; 1771 PetscViewer viewer; 1772 PetscViewerFormat format; 1773 1774 PetscFunctionBegin; 1775 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1776 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1777 1778 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1779 1780 /* loop over matrix elements drawing boxes */ 1781 1782 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1783 color = PETSC_DRAW_BLUE; 1784 for (i=0,row=0; i<mbs; i++,row+=bs) { 1785 for (j=a->i[i]; j<a->i[i+1]; j++) { 1786 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1787 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1788 aa = a->a + j*bs2; 1789 for (k=0; k<bs; k++) { 1790 for (l=0; l<bs; l++) { 1791 if (PetscRealPart(*aa++) >= 0.) continue; 1792 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1793 } 1794 } 1795 } 1796 } 1797 color = PETSC_DRAW_CYAN; 1798 for (i=0,row=0; i<mbs; i++,row+=bs) { 1799 for (j=a->i[i]; j<a->i[i+1]; j++) { 1800 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1801 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1802 aa = a->a + j*bs2; 1803 for (k=0; k<bs; k++) { 1804 for (l=0; l<bs; l++) { 1805 if (PetscRealPart(*aa++) != 0.) continue; 1806 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1807 } 1808 } 1809 } 1810 } 1811 color = PETSC_DRAW_RED; 1812 for (i=0,row=0; i<mbs; i++,row+=bs) { 1813 for (j=a->i[i]; j<a->i[i+1]; j++) { 1814 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1815 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1816 aa = a->a + j*bs2; 1817 for (k=0; k<bs; k++) { 1818 for (l=0; l<bs; l++) { 1819 if (PetscRealPart(*aa++) <= 0.) continue; 1820 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1821 } 1822 } 1823 } 1824 } 1825 } else { 1826 /* use contour shading to indicate magnitude of values */ 1827 /* first determine max of all nonzero values */ 1828 PetscDraw popup; 1829 PetscReal scale,maxv = 0.0; 1830 1831 for (i=0; i<a->nz*a->bs2; i++) { 1832 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1833 } 1834 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1835 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1836 if (popup) { 1837 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1838 } 1839 for (i=0,row=0; i<mbs; i++,row+=bs) { 1840 for (j=a->i[i]; j<a->i[i+1]; j++) { 1841 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1842 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1843 aa = a->a + j*bs2; 1844 for (k=0; k<bs; k++) { 1845 for (l=0; l<bs; l++) { 1846 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1847 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1848 } 1849 } 1850 } 1851 } 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1858 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1859 { 1860 PetscErrorCode ierr; 1861 PetscReal xl,yl,xr,yr,w,h; 1862 PetscDraw draw; 1863 PetscBool isnull; 1864 1865 PetscFunctionBegin; 1866 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1867 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1868 1869 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1870 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1871 xr += w; yr += h; xl = -w; yl = -h; 1872 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1873 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1874 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatView_SeqBAIJ" 1880 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1881 { 1882 PetscErrorCode ierr; 1883 PetscBool iascii,isbinary,isdraw; 1884 1885 PetscFunctionBegin; 1886 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1887 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1888 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1889 if (iascii) { 1890 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1891 } else if (isbinary) { 1892 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1893 } else if (isdraw) { 1894 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1895 } else { 1896 Mat B; 1897 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1898 ierr = MatView(B,viewer);CHKERRQ(ierr); 1899 ierr = MatDestroy(&B);CHKERRQ(ierr); 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1907 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1908 { 1909 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1910 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1911 PetscInt *ai = a->i,*ailen = a->ilen; 1912 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1913 MatScalar *ap,*aa = a->a; 1914 1915 PetscFunctionBegin; 1916 for (k=0; k<m; k++) { /* loop over rows */ 1917 row = im[k]; brow = row/bs; 1918 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1919 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1920 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1921 nrow = ailen[brow]; 1922 for (l=0; l<n; l++) { /* loop over columns */ 1923 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1924 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1925 col = in[l]; 1926 bcol = col/bs; 1927 cidx = col%bs; 1928 ridx = row%bs; 1929 high = nrow; 1930 low = 0; /* assume unsorted */ 1931 while (high-low > 5) { 1932 t = (low+high)/2; 1933 if (rp[t] > bcol) high = t; 1934 else low = t; 1935 } 1936 for (i=low; i<high; i++) { 1937 if (rp[i] > bcol) break; 1938 if (rp[i] == bcol) { 1939 *v++ = ap[bs2*i+bs*cidx+ridx]; 1940 goto finished; 1941 } 1942 } 1943 *v++ = 0.0; 1944 finished:; 1945 } 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1952 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1953 { 1954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1955 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1956 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1957 PetscErrorCode ierr; 1958 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1959 PetscBool roworiented=a->roworiented; 1960 const PetscScalar *value = v; 1961 MatScalar *ap,*aa = a->a,*bap; 1962 1963 PetscFunctionBegin; 1964 if (roworiented) { 1965 stepval = (n-1)*bs; 1966 } else { 1967 stepval = (m-1)*bs; 1968 } 1969 for (k=0; k<m; k++) { /* loop over added rows */ 1970 row = im[k]; 1971 if (row < 0) continue; 1972 #if defined(PETSC_USE_DEBUG) 1973 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1974 #endif 1975 rp = aj + ai[row]; 1976 ap = aa + bs2*ai[row]; 1977 rmax = imax[row]; 1978 nrow = ailen[row]; 1979 low = 0; 1980 high = nrow; 1981 for (l=0; l<n; l++) { /* loop over added columns */ 1982 if (in[l] < 0) continue; 1983 #if defined(PETSC_USE_DEBUG) 1984 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1985 #endif 1986 col = in[l]; 1987 if (roworiented) { 1988 value = v + (k*(stepval+bs) + l)*bs; 1989 } else { 1990 value = v + (l*(stepval+bs) + k)*bs; 1991 } 1992 if (col <= lastcol) low = 0; 1993 else high = nrow; 1994 lastcol = col; 1995 while (high-low > 7) { 1996 t = (low+high)/2; 1997 if (rp[t] > col) high = t; 1998 else low = t; 1999 } 2000 for (i=low; i<high; i++) { 2001 if (rp[i] > col) break; 2002 if (rp[i] == col) { 2003 bap = ap + bs2*i; 2004 if (roworiented) { 2005 if (is == ADD_VALUES) { 2006 for (ii=0; ii<bs; ii++,value+=stepval) { 2007 for (jj=ii; jj<bs2; jj+=bs) { 2008 bap[jj] += *value++; 2009 } 2010 } 2011 } else { 2012 for (ii=0; ii<bs; ii++,value+=stepval) { 2013 for (jj=ii; jj<bs2; jj+=bs) { 2014 bap[jj] = *value++; 2015 } 2016 } 2017 } 2018 } else { 2019 if (is == ADD_VALUES) { 2020 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2021 for (jj=0; jj<bs; jj++) { 2022 bap[jj] += value[jj]; 2023 } 2024 bap += bs; 2025 } 2026 } else { 2027 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2028 for (jj=0; jj<bs; jj++) { 2029 bap[jj] = value[jj]; 2030 } 2031 bap += bs; 2032 } 2033 } 2034 } 2035 goto noinsert2; 2036 } 2037 } 2038 if (nonew == 1) goto noinsert2; 2039 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2040 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2041 N = nrow++ - 1; high++; 2042 /* shift up all the later entries in this row */ 2043 for (ii=N; ii>=i; ii--) { 2044 rp[ii+1] = rp[ii]; 2045 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2046 } 2047 if (N >= i) { 2048 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2049 } 2050 rp[i] = col; 2051 bap = ap + bs2*i; 2052 if (roworiented) { 2053 for (ii=0; ii<bs; ii++,value+=stepval) { 2054 for (jj=ii; jj<bs2; jj+=bs) { 2055 bap[jj] = *value++; 2056 } 2057 } 2058 } else { 2059 for (ii=0; ii<bs; ii++,value+=stepval) { 2060 for (jj=0; jj<bs; jj++) { 2061 *bap++ = *value++; 2062 } 2063 } 2064 } 2065 noinsert2:; 2066 low = i; 2067 } 2068 ailen[row] = nrow; 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2075 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2076 { 2077 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2078 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2079 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2080 PetscErrorCode ierr; 2081 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2082 MatScalar *aa = a->a,*ap; 2083 PetscReal ratio=0.6; 2084 2085 PetscFunctionBegin; 2086 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2087 2088 if (m) rmax = ailen[0]; 2089 for (i=1; i<mbs; i++) { 2090 /* move each row back by the amount of empty slots (fshift) before it*/ 2091 fshift += imax[i-1] - ailen[i-1]; 2092 rmax = PetscMax(rmax,ailen[i]); 2093 if (fshift) { 2094 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2095 N = ailen[i]; 2096 for (j=0; j<N; j++) { 2097 ip[j-fshift] = ip[j]; 2098 2099 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2100 } 2101 } 2102 ai[i] = ai[i-1] + ailen[i-1]; 2103 } 2104 if (mbs) { 2105 fshift += imax[mbs-1] - ailen[mbs-1]; 2106 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2107 } 2108 /* reset ilen and imax for each row */ 2109 for (i=0; i<mbs; i++) { 2110 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2111 } 2112 a->nz = ai[mbs]; 2113 2114 /* diagonals may have moved, so kill the diagonal pointers */ 2115 a->idiagvalid = PETSC_FALSE; 2116 if (fshift && a->diag) { 2117 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2118 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2119 a->diag = 0; 2120 } 2121 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2122 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2123 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2124 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2125 2126 A->info.mallocs += a->reallocs; 2127 a->reallocs = 0; 2128 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2129 2130 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2131 2132 A->same_nonzero = PETSC_TRUE; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 /* 2137 This function returns an array of flags which indicate the locations of contiguous 2138 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2139 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2140 Assume: sizes should be long enough to hold all the values. 2141 */ 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2144 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2145 { 2146 PetscInt i,j,k,row; 2147 PetscBool flg; 2148 2149 PetscFunctionBegin; 2150 for (i=0,j=0; i<n; j++) { 2151 row = idx[i]; 2152 if (row%bs!=0) { /* Not the begining of a block */ 2153 sizes[j] = 1; 2154 i++; 2155 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2156 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2157 i++; 2158 } else { /* Begining of the block, so check if the complete block exists */ 2159 flg = PETSC_TRUE; 2160 for (k=1; k<bs; k++) { 2161 if (row+k != idx[i+k]) { /* break in the block */ 2162 flg = PETSC_FALSE; 2163 break; 2164 } 2165 } 2166 if (flg) { /* No break in the bs */ 2167 sizes[j] = bs; 2168 i += bs; 2169 } else { 2170 sizes[j] = 1; 2171 i++; 2172 } 2173 } 2174 } 2175 *bs_max = j; 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2181 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2182 { 2183 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2184 PetscErrorCode ierr; 2185 PetscInt i,j,k,count,*rows; 2186 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2187 PetscScalar zero = 0.0; 2188 MatScalar *aa; 2189 const PetscScalar *xx; 2190 PetscScalar *bb; 2191 2192 PetscFunctionBegin; 2193 /* fix right hand side if needed */ 2194 if (x && b) { 2195 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2196 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2197 for (i=0; i<is_n; i++) { 2198 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2199 } 2200 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2201 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2202 } 2203 2204 /* Make a copy of the IS and sort it */ 2205 /* allocate memory for rows,sizes */ 2206 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2207 2208 /* copy IS values to rows, and sort them */ 2209 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2210 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2211 2212 if (baij->keepnonzeropattern) { 2213 for (i=0; i<is_n; i++) sizes[i] = 1; 2214 bs_max = is_n; 2215 A->same_nonzero = PETSC_TRUE; 2216 } else { 2217 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2218 2219 A->same_nonzero = PETSC_FALSE; 2220 } 2221 2222 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2223 row = rows[j]; 2224 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2225 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2226 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2227 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2228 if (diag != (PetscScalar)0.0) { 2229 if (baij->ilen[row/bs] > 0) { 2230 baij->ilen[row/bs] = 1; 2231 baij->j[baij->i[row/bs]] = row/bs; 2232 2233 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2234 } 2235 /* Now insert all the diagonal values for this bs */ 2236 for (k=0; k<bs; k++) { 2237 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2238 } 2239 } else { /* (diag == 0.0) */ 2240 baij->ilen[row/bs] = 0; 2241 } /* end (diag == 0.0) */ 2242 } else { /* (sizes[i] != bs) */ 2243 #if defined(PETSC_USE_DEBUG) 2244 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2245 #endif 2246 for (k=0; k<count; k++) { 2247 aa[0] = zero; 2248 aa += bs; 2249 } 2250 if (diag != (PetscScalar)0.0) { 2251 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2252 } 2253 } 2254 } 2255 2256 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2257 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2263 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2264 { 2265 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 PetscInt i,j,k,count; 2268 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2269 PetscScalar zero = 0.0; 2270 MatScalar *aa; 2271 const PetscScalar *xx; 2272 PetscScalar *bb; 2273 PetscBool *zeroed,vecs = PETSC_FALSE; 2274 2275 PetscFunctionBegin; 2276 /* fix right hand side if needed */ 2277 if (x && b) { 2278 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2279 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2280 vecs = PETSC_TRUE; 2281 } 2282 A->same_nonzero = PETSC_TRUE; 2283 2284 /* zero the columns */ 2285 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2286 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2287 for (i=0; i<is_n; i++) { 2288 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2289 zeroed[is_idx[i]] = PETSC_TRUE; 2290 } 2291 for (i=0; i<A->rmap->N; i++) { 2292 if (!zeroed[i]) { 2293 row = i/bs; 2294 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2295 for (k=0; k<bs; k++) { 2296 col = bs*baij->j[j] + k; 2297 if (zeroed[col]) { 2298 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2299 if (vecs) bb[i] -= aa[0]*xx[col]; 2300 aa[0] = 0.0; 2301 } 2302 } 2303 } 2304 } else if (vecs) bb[i] = diag*xx[i]; 2305 } 2306 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2307 if (vecs) { 2308 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2309 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2310 } 2311 2312 /* zero the rows */ 2313 for (i=0; i<is_n; i++) { 2314 row = is_idx[i]; 2315 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2316 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2317 for (k=0; k<count; k++) { 2318 aa[0] = zero; 2319 aa += bs; 2320 } 2321 if (diag != (PetscScalar)0.0) { 2322 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2323 } 2324 } 2325 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2331 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2332 { 2333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2334 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2335 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2336 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2337 PetscErrorCode ierr; 2338 PetscInt ridx,cidx,bs2=a->bs2; 2339 PetscBool roworiented=a->roworiented; 2340 MatScalar *ap,value,*aa=a->a,*bap; 2341 2342 PetscFunctionBegin; 2343 if (v) PetscValidScalarPointer(v,6); 2344 for (k=0; k<m; k++) { /* loop over added rows */ 2345 row = im[k]; 2346 brow = row/bs; 2347 if (row < 0) continue; 2348 #if defined(PETSC_USE_DEBUG) 2349 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2350 #endif 2351 rp = aj + ai[brow]; 2352 ap = aa + bs2*ai[brow]; 2353 rmax = imax[brow]; 2354 nrow = ailen[brow]; 2355 low = 0; 2356 high = nrow; 2357 for (l=0; l<n; l++) { /* loop over added columns */ 2358 if (in[l] < 0) continue; 2359 #if defined(PETSC_USE_DEBUG) 2360 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2361 #endif 2362 col = in[l]; bcol = col/bs; 2363 ridx = row % bs; cidx = col % bs; 2364 if (roworiented) { 2365 value = v[l + k*n]; 2366 } else { 2367 value = v[k + l*m]; 2368 } 2369 if (col <= lastcol) low = 0; else high = nrow; 2370 lastcol = col; 2371 while (high-low > 7) { 2372 t = (low+high)/2; 2373 if (rp[t] > bcol) high = t; 2374 else low = t; 2375 } 2376 for (i=low; i<high; i++) { 2377 if (rp[i] > bcol) break; 2378 if (rp[i] == bcol) { 2379 bap = ap + bs2*i + bs*cidx + ridx; 2380 if (is == ADD_VALUES) *bap += value; 2381 else *bap = value; 2382 goto noinsert1; 2383 } 2384 } 2385 if (nonew == 1) goto noinsert1; 2386 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2387 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2388 N = nrow++ - 1; high++; 2389 /* shift up all the later entries in this row */ 2390 for (ii=N; ii>=i; ii--) { 2391 rp[ii+1] = rp[ii]; 2392 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2393 } 2394 if (N>=i) { 2395 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2396 } 2397 rp[i] = bcol; 2398 ap[bs2*i + bs*cidx + ridx] = value; 2399 a->nz++; 2400 noinsert1:; 2401 low = i; 2402 } 2403 ailen[brow] = nrow; 2404 } 2405 A->same_nonzero = PETSC_FALSE; 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2411 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2412 { 2413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2414 Mat outA; 2415 PetscErrorCode ierr; 2416 PetscBool row_identity,col_identity; 2417 2418 PetscFunctionBegin; 2419 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2420 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2421 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2422 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2423 2424 outA = inA; 2425 inA->factortype = MAT_FACTOR_LU; 2426 2427 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2428 2429 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2430 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2431 a->row = row; 2432 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2433 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2434 a->col = col; 2435 2436 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2437 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2438 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2439 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2440 2441 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2442 if (!a->solve_work) { 2443 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2444 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2445 } 2446 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2452 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2453 { 2454 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2455 PetscInt i,nz,mbs; 2456 2457 PetscFunctionBegin; 2458 nz = baij->maxnz; 2459 mbs = baij->mbs; 2460 for (i=0; i<nz; i++) { 2461 baij->j[i] = indices[i]; 2462 } 2463 baij->nz = nz; 2464 for (i=0; i<mbs; i++) { 2465 baij->ilen[i] = baij->imax[i]; 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2472 /*@ 2473 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2474 in the matrix. 2475 2476 Input Parameters: 2477 + mat - the SeqBAIJ matrix 2478 - indices - the column indices 2479 2480 Level: advanced 2481 2482 Notes: 2483 This can be called if you have precomputed the nonzero structure of the 2484 matrix and want to provide it to the matrix object to improve the performance 2485 of the MatSetValues() operation. 2486 2487 You MUST have set the correct numbers of nonzeros per row in the call to 2488 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2489 2490 MUST be called before any calls to MatSetValues(); 2491 2492 @*/ 2493 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2494 { 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2499 PetscValidPointer(indices,2); 2500 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2506 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2507 { 2508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2509 PetscErrorCode ierr; 2510 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2511 PetscReal atmp; 2512 PetscScalar *x,zero = 0.0; 2513 MatScalar *aa; 2514 PetscInt ncols,brow,krow,kcol; 2515 2516 PetscFunctionBegin; 2517 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2518 bs = A->rmap->bs; 2519 aa = a->a; 2520 ai = a->i; 2521 aj = a->j; 2522 mbs = a->mbs; 2523 2524 ierr = VecSet(v,zero);CHKERRQ(ierr); 2525 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2526 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2527 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2528 for (i=0; i<mbs; i++) { 2529 ncols = ai[1] - ai[0]; ai++; 2530 brow = bs*i; 2531 for (j=0; j<ncols; j++) { 2532 for (kcol=0; kcol<bs; kcol++) { 2533 for (krow=0; krow<bs; krow++) { 2534 atmp = PetscAbsScalar(*aa);aa++; 2535 row = brow + krow; /* row index */ 2536 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2537 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2538 } 2539 } 2540 aj++; 2541 } 2542 } 2543 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatCopy_SeqBAIJ" 2549 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2550 { 2551 PetscErrorCode ierr; 2552 2553 PetscFunctionBegin; 2554 /* If the two matrices have the same copy implementation, use fast copy. */ 2555 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2556 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2557 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2558 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2559 2560 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2561 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2562 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2563 } else { 2564 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2565 } 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #undef __FUNCT__ 2570 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2571 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2572 { 2573 PetscErrorCode ierr; 2574 2575 PetscFunctionBegin; 2576 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 #undef __FUNCT__ 2581 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2582 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2583 { 2584 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2585 2586 PetscFunctionBegin; 2587 *array = a->a; 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2593 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2594 { 2595 PetscFunctionBegin; 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2601 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2602 { 2603 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2604 PetscErrorCode ierr; 2605 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2606 PetscBLASInt one=1; 2607 2608 PetscFunctionBegin; 2609 if (str == SAME_NONZERO_PATTERN) { 2610 PetscScalar alpha = a; 2611 PetscBLASInt bnz; 2612 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2613 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2614 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2615 if (y->xtoy && y->XtoY != X) { 2616 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2617 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2618 } 2619 if (!y->xtoy) { /* get xtoy */ 2620 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2621 y->XtoY = X; 2622 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2623 } 2624 for (i=0; i<x->nz; i++) { 2625 j = 0; 2626 while (j < bs2) { 2627 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2628 j++; 2629 } 2630 } 2631 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2632 } else { 2633 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2634 } 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2640 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2641 { 2642 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2643 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2644 MatScalar *aa = a->a; 2645 2646 PetscFunctionBegin; 2647 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2653 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2654 { 2655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2656 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2657 MatScalar *aa = a->a; 2658 2659 PetscFunctionBegin; 2660 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2665 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2668 /* 2669 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2670 */ 2671 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2672 { 2673 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2674 PetscErrorCode ierr; 2675 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2676 PetscInt nz = a->i[m],row,*jj,mr,col; 2677 2678 PetscFunctionBegin; 2679 *nn = n; 2680 if (!ia) PetscFunctionReturn(0); 2681 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2682 else { 2683 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2684 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2685 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2686 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2687 jj = a->j; 2688 for (i=0; i<nz; i++) { 2689 collengths[jj[i]]++; 2690 } 2691 cia[0] = oshift; 2692 for (i=0; i<n; i++) { 2693 cia[i+1] = cia[i] + collengths[i]; 2694 } 2695 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2696 jj = a->j; 2697 for (row=0; row<m; row++) { 2698 mr = a->i[row+1] - a->i[row]; 2699 for (i=0; i<mr; i++) { 2700 col = *jj++; 2701 2702 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2703 } 2704 } 2705 ierr = PetscFree(collengths);CHKERRQ(ierr); 2706 *ia = cia; *ja = cja; 2707 } 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2713 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2714 { 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 if (!ia) PetscFunctionReturn(0); 2719 ierr = PetscFree(*ia);CHKERRQ(ierr); 2720 ierr = PetscFree(*ja);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2726 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2727 { 2728 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2729 PetscErrorCode ierr; 2730 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2731 PetscScalar dx,*y,*xx,*w3_array; 2732 PetscScalar *vscale_array; 2733 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2734 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2735 void *fctx = coloring->fctx; 2736 PetscBool flg = PETSC_FALSE; 2737 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2738 Vec x1_tmp; 2739 2740 PetscFunctionBegin; 2741 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2742 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2743 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2744 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2745 2746 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2747 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2748 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2749 if (flg) { 2750 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2751 } else { 2752 PetscBool assembled; 2753 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2754 if (assembled) { 2755 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2756 } 2757 } 2758 2759 x1_tmp = x1; 2760 if (!coloring->vscale) { 2761 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2762 } 2763 2764 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2765 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2766 } 2767 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2768 2769 /* Set w1 = F(x1) */ 2770 if (!coloring->fset) { 2771 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2772 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2773 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2774 } else { 2775 coloring->fset = PETSC_FALSE; 2776 } 2777 2778 if (!coloring->w3) { 2779 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2781 } 2782 w3 = coloring->w3; 2783 2784 /* Compute all the local scale factors, including ghost points */ 2785 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2786 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2787 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2788 if (ctype == IS_COLORING_GHOSTED) { 2789 col_start = 0; col_end = N; 2790 } else if (ctype == IS_COLORING_GLOBAL) { 2791 xx = xx - start; 2792 vscale_array = vscale_array - start; 2793 col_start = start; col_end = N + start; 2794 } 2795 for (col=col_start; col<col_end; col++) { 2796 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2797 if (coloring->htype[0] == 'w') { 2798 dx = 1.0 + unorm; 2799 } else { 2800 dx = xx[col]; 2801 } 2802 if (dx == (PetscScalar)0.0) dx = 1.0; 2803 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2804 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2805 dx *= epsilon; 2806 vscale_array[col] = (PetscScalar)1.0/dx; 2807 } 2808 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2809 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2810 if (ctype == IS_COLORING_GLOBAL) { 2811 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2812 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2813 } 2814 if (coloring->vscaleforrow) { 2815 vscaleforrow = coloring->vscaleforrow; 2816 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2817 2818 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2819 /* 2820 Loop over each color 2821 */ 2822 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2823 for (k=0; k<coloring->ncolors; k++) { 2824 coloring->currentcolor = k; 2825 for (i=0; i<bs; i++) { 2826 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2827 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2828 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2829 /* 2830 Loop over each column associated with color 2831 adding the perturbation to the vector w3. 2832 */ 2833 for (l=0; l<coloring->ncolumns[k]; l++) { 2834 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2835 if (coloring->htype[0] == 'w') { 2836 dx = 1.0 + unorm; 2837 } else { 2838 dx = xx[col]; 2839 } 2840 if (dx == (PetscScalar)0.0) dx = 1.0; 2841 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2842 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2843 dx *= epsilon; 2844 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2845 w3_array[col] += dx; 2846 } 2847 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2848 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2849 2850 /* 2851 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2852 w2 = F(x1 + dx) - F(x1) 2853 */ 2854 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2855 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2856 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2857 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2858 2859 /* 2860 Loop over rows of vector, putting results into Jacobian matrix 2861 */ 2862 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2863 for (l=0; l<coloring->nrows[k]; l++) { 2864 row = bs*coloring->rows[k][l]; /* local row index */ 2865 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2866 for (j=0; j<bs; j++) { 2867 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2868 srows[j] = row + start + j; 2869 } 2870 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2871 } 2872 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2873 } 2874 } /* endof for each color */ 2875 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2876 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2877 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2878 ierr = PetscFree(srows);CHKERRQ(ierr); 2879 2880 coloring->currentcolor = -1; 2881 2882 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2884 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2885 PetscFunctionReturn(0); 2886 } 2887 2888 /* -------------------------------------------------------------------*/ 2889 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2890 MatGetRow_SeqBAIJ, 2891 MatRestoreRow_SeqBAIJ, 2892 MatMult_SeqBAIJ_N, 2893 /* 4*/ MatMultAdd_SeqBAIJ_N, 2894 MatMultTranspose_SeqBAIJ, 2895 MatMultTransposeAdd_SeqBAIJ, 2896 0, 2897 0, 2898 0, 2899 /* 10*/ 0, 2900 MatLUFactor_SeqBAIJ, 2901 0, 2902 0, 2903 MatTranspose_SeqBAIJ, 2904 /* 15*/ MatGetInfo_SeqBAIJ, 2905 MatEqual_SeqBAIJ, 2906 MatGetDiagonal_SeqBAIJ, 2907 MatDiagonalScale_SeqBAIJ, 2908 MatNorm_SeqBAIJ, 2909 /* 20*/ 0, 2910 MatAssemblyEnd_SeqBAIJ, 2911 MatSetOption_SeqBAIJ, 2912 MatZeroEntries_SeqBAIJ, 2913 /* 24*/ MatZeroRows_SeqBAIJ, 2914 0, 2915 0, 2916 0, 2917 0, 2918 /* 29*/ MatSetUp_SeqBAIJ, 2919 0, 2920 0, 2921 0, 2922 0, 2923 /* 34*/ MatDuplicate_SeqBAIJ, 2924 0, 2925 0, 2926 MatILUFactor_SeqBAIJ, 2927 0, 2928 /* 39*/ MatAXPY_SeqBAIJ, 2929 MatGetSubMatrices_SeqBAIJ, 2930 MatIncreaseOverlap_SeqBAIJ, 2931 MatGetValues_SeqBAIJ, 2932 MatCopy_SeqBAIJ, 2933 /* 44*/ 0, 2934 MatScale_SeqBAIJ, 2935 0, 2936 0, 2937 MatZeroRowsColumns_SeqBAIJ, 2938 /* 49*/ 0, 2939 MatGetRowIJ_SeqBAIJ, 2940 MatRestoreRowIJ_SeqBAIJ, 2941 MatGetColumnIJ_SeqBAIJ, 2942 MatRestoreColumnIJ_SeqBAIJ, 2943 /* 54*/ MatFDColoringCreate_SeqAIJ, 2944 0, 2945 0, 2946 0, 2947 MatSetValuesBlocked_SeqBAIJ, 2948 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2949 MatDestroy_SeqBAIJ, 2950 MatView_SeqBAIJ, 2951 0, 2952 0, 2953 /* 64*/ 0, 2954 0, 2955 0, 2956 0, 2957 0, 2958 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2959 0, 2960 MatConvert_Basic, 2961 0, 2962 0, 2963 /* 74*/ 0, 2964 MatFDColoringApply_BAIJ, 2965 0, 2966 0, 2967 0, 2968 /* 79*/ 0, 2969 0, 2970 0, 2971 0, 2972 MatLoad_SeqBAIJ, 2973 /* 84*/ 0, 2974 0, 2975 0, 2976 0, 2977 0, 2978 /* 89*/ 0, 2979 0, 2980 0, 2981 0, 2982 0, 2983 /* 94*/ 0, 2984 0, 2985 0, 2986 0, 2987 0, 2988 /* 99*/ 0, 2989 0, 2990 0, 2991 0, 2992 0, 2993 /*104*/ 0, 2994 MatRealPart_SeqBAIJ, 2995 MatImaginaryPart_SeqBAIJ, 2996 0, 2997 0, 2998 /*109*/ 0, 2999 0, 3000 0, 3001 0, 3002 MatMissingDiagonal_SeqBAIJ, 3003 /*114*/ 0, 3004 0, 3005 0, 3006 0, 3007 0, 3008 /*119*/ 0, 3009 0, 3010 MatMultHermitianTranspose_SeqBAIJ, 3011 MatMultHermitianTransposeAdd_SeqBAIJ, 3012 0, 3013 /*124*/ 0, 3014 0, 3015 MatInvertBlockDiagonal_SeqBAIJ, 3016 0, 3017 0, 3018 /*129*/ 0, 3019 0, 3020 0, 3021 0, 3022 0, 3023 /*134*/ 0, 3024 0, 3025 0, 3026 0, 3027 0, 3028 /*139*/ 0, 3029 0 3030 }; 3031 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3034 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3035 { 3036 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3037 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3038 PetscErrorCode ierr; 3039 3040 PetscFunctionBegin; 3041 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3042 3043 /* allocate space for values if not already there */ 3044 if (!aij->saved_values) { 3045 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3046 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3047 } 3048 3049 /* copy values over */ 3050 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3056 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3057 { 3058 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3059 PetscErrorCode ierr; 3060 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3061 3062 PetscFunctionBegin; 3063 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3064 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3065 3066 /* copy values over */ 3067 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3072 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3073 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3076 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3077 { 3078 Mat_SeqBAIJ *b; 3079 PetscErrorCode ierr; 3080 PetscInt i,mbs,nbs,bs2; 3081 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3082 3083 PetscFunctionBegin; 3084 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3085 if (nz == MAT_SKIP_ALLOCATION) { 3086 skipallocation = PETSC_TRUE; 3087 nz = 0; 3088 } 3089 3090 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3091 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3092 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3093 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3094 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3095 3096 B->preallocated = PETSC_TRUE; 3097 3098 mbs = B->rmap->n/bs; 3099 nbs = B->cmap->n/bs; 3100 bs2 = bs*bs; 3101 3102 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3103 3104 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3105 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3106 if (nnz) { 3107 for (i=0; i<mbs; i++) { 3108 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3109 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3110 } 3111 } 3112 3113 b = (Mat_SeqBAIJ*)B->data; 3114 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3115 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3116 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3117 3118 if (!flg) { 3119 switch (bs) { 3120 case 1: 3121 B->ops->mult = MatMult_SeqBAIJ_1; 3122 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3123 B->ops->sor = MatSOR_SeqBAIJ_1; 3124 break; 3125 case 2: 3126 B->ops->mult = MatMult_SeqBAIJ_2; 3127 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3128 B->ops->sor = MatSOR_SeqBAIJ_2; 3129 break; 3130 case 3: 3131 B->ops->mult = MatMult_SeqBAIJ_3; 3132 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3133 B->ops->sor = MatSOR_SeqBAIJ_3; 3134 break; 3135 case 4: 3136 B->ops->mult = MatMult_SeqBAIJ_4; 3137 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3138 B->ops->sor = MatSOR_SeqBAIJ_4; 3139 break; 3140 case 5: 3141 B->ops->mult = MatMult_SeqBAIJ_5; 3142 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3143 B->ops->sor = MatSOR_SeqBAIJ_5; 3144 break; 3145 case 6: 3146 B->ops->mult = MatMult_SeqBAIJ_6; 3147 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3148 B->ops->sor = MatSOR_SeqBAIJ_6; 3149 break; 3150 case 7: 3151 B->ops->mult = MatMult_SeqBAIJ_7; 3152 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3153 B->ops->sor = MatSOR_SeqBAIJ_7; 3154 break; 3155 case 15: 3156 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3157 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3158 B->ops->sor = MatSOR_SeqBAIJ_N; 3159 break; 3160 default: 3161 B->ops->mult = MatMult_SeqBAIJ_N; 3162 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3163 B->ops->sor = MatSOR_SeqBAIJ_N; 3164 break; 3165 } 3166 } 3167 b->mbs = mbs; 3168 b->nbs = nbs; 3169 if (!skipallocation) { 3170 if (!b->imax) { 3171 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3172 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3173 3174 b->free_imax_ilen = PETSC_TRUE; 3175 } 3176 /* b->ilen will count nonzeros in each block row so far. */ 3177 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3178 if (!nnz) { 3179 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3180 else if (nz < 0) nz = 1; 3181 for (i=0; i<mbs; i++) b->imax[i] = nz; 3182 nz = nz*mbs; 3183 } else { 3184 nz = 0; 3185 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3186 } 3187 3188 /* allocate the matrix space */ 3189 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3190 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3191 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3192 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3193 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3194 3195 b->singlemalloc = PETSC_TRUE; 3196 b->i[0] = 0; 3197 for (i=1; i<mbs+1; i++) { 3198 b->i[i] = b->i[i-1] + b->imax[i-1]; 3199 } 3200 b->free_a = PETSC_TRUE; 3201 b->free_ij = PETSC_TRUE; 3202 } else { 3203 b->free_a = PETSC_FALSE; 3204 b->free_ij = PETSC_FALSE; 3205 } 3206 3207 b->bs2 = bs2; 3208 b->mbs = mbs; 3209 b->nz = 0; 3210 b->maxnz = nz; 3211 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3212 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3213 PetscFunctionReturn(0); 3214 } 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3218 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3219 { 3220 PetscInt i,m,nz,nz_max=0,*nnz; 3221 PetscScalar *values=0; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3226 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3227 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3228 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3229 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3230 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3231 m = B->rmap->n/bs; 3232 3233 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3234 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3235 for (i=0; i<m; i++) { 3236 nz = ii[i+1]- ii[i]; 3237 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3238 nz_max = PetscMax(nz_max, nz); 3239 nnz[i] = nz; 3240 } 3241 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3242 ierr = PetscFree(nnz);CHKERRQ(ierr); 3243 3244 values = (PetscScalar*)V; 3245 if (!values) { 3246 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3247 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3248 } 3249 for (i=0; i<m; i++) { 3250 PetscInt ncols = ii[i+1] - ii[i]; 3251 const PetscInt *icols = jj + ii[i]; 3252 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3253 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3254 } 3255 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3256 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3257 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3258 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3259 PetscFunctionReturn(0); 3260 } 3261 3262 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3263 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3264 #if defined(PETSC_HAVE_MUMPS) 3265 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3266 #endif 3267 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3268 3269 /*MC 3270 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3271 block sparse compressed row format. 3272 3273 Options Database Keys: 3274 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3275 3276 Level: beginner 3277 3278 .seealso: MatCreateSeqBAIJ() 3279 M*/ 3280 3281 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "MatCreate_SeqBAIJ" 3285 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3286 { 3287 PetscErrorCode ierr; 3288 PetscMPIInt size; 3289 Mat_SeqBAIJ *b; 3290 3291 PetscFunctionBegin; 3292 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3293 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3294 3295 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3296 B->data = (void*)b; 3297 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3298 3299 b->row = 0; 3300 b->col = 0; 3301 b->icol = 0; 3302 b->reallocs = 0; 3303 b->saved_values = 0; 3304 3305 b->roworiented = PETSC_TRUE; 3306 b->nonew = 0; 3307 b->diag = 0; 3308 b->solve_work = 0; 3309 b->mult_work = 0; 3310 B->spptr = 0; 3311 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3312 b->keepnonzeropattern = PETSC_FALSE; 3313 b->xtoy = 0; 3314 b->XtoY = 0; 3315 B->same_nonzero = PETSC_FALSE; 3316 3317 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3318 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3319 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3320 #if defined(PETSC_HAVE_MUMPS) 3321 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3322 #endif 3323 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3324 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3325 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3326 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3327 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3328 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3329 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3330 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3331 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3332 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3333 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3334 PetscFunctionReturn(0); 3335 } 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3339 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3340 { 3341 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3342 PetscErrorCode ierr; 3343 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3344 3345 PetscFunctionBegin; 3346 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3347 3348 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3349 c->imax = a->imax; 3350 c->ilen = a->ilen; 3351 c->free_imax_ilen = PETSC_FALSE; 3352 } else { 3353 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3354 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3355 for (i=0; i<mbs; i++) { 3356 c->imax[i] = a->imax[i]; 3357 c->ilen[i] = a->ilen[i]; 3358 } 3359 c->free_imax_ilen = PETSC_TRUE; 3360 } 3361 3362 /* allocate the matrix space */ 3363 if (mallocmatspace) { 3364 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3365 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3366 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3367 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3368 3369 c->i = a->i; 3370 c->j = a->j; 3371 c->singlemalloc = PETSC_FALSE; 3372 c->free_a = PETSC_TRUE; 3373 c->free_ij = PETSC_FALSE; 3374 c->parent = A; 3375 C->preallocated = PETSC_TRUE; 3376 C->assembled = PETSC_TRUE; 3377 3378 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3379 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3380 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3381 } else { 3382 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3383 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3384 3385 c->singlemalloc = PETSC_TRUE; 3386 c->free_a = PETSC_TRUE; 3387 c->free_ij = PETSC_TRUE; 3388 3389 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3390 if (mbs > 0) { 3391 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3392 if (cpvalues == MAT_COPY_VALUES) { 3393 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3394 } else { 3395 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3396 } 3397 } 3398 C->preallocated = PETSC_TRUE; 3399 C->assembled = PETSC_TRUE; 3400 } 3401 } 3402 3403 c->roworiented = a->roworiented; 3404 c->nonew = a->nonew; 3405 3406 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3407 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3408 3409 c->bs2 = a->bs2; 3410 c->mbs = a->mbs; 3411 c->nbs = a->nbs; 3412 3413 if (a->diag) { 3414 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3415 c->diag = a->diag; 3416 c->free_diag = PETSC_FALSE; 3417 } else { 3418 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3419 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3420 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3421 c->free_diag = PETSC_TRUE; 3422 } 3423 } else c->diag = 0; 3424 3425 c->nz = a->nz; 3426 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3427 c->solve_work = 0; 3428 c->mult_work = 0; 3429 3430 c->compressedrow.use = a->compressedrow.use; 3431 c->compressedrow.nrows = a->compressedrow.nrows; 3432 c->compressedrow.check = a->compressedrow.check; 3433 if (a->compressedrow.use) { 3434 i = a->compressedrow.nrows; 3435 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3436 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3437 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3438 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3439 } else { 3440 c->compressedrow.use = PETSC_FALSE; 3441 c->compressedrow.i = NULL; 3442 c->compressedrow.rindex = NULL; 3443 } 3444 C->same_nonzero = A->same_nonzero; 3445 3446 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3447 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3448 PetscFunctionReturn(0); 3449 } 3450 3451 #undef __FUNCT__ 3452 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3453 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3454 { 3455 PetscErrorCode ierr; 3456 3457 PetscFunctionBegin; 3458 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3459 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3460 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3461 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3462 PetscFunctionReturn(0); 3463 } 3464 3465 #undef __FUNCT__ 3466 #define __FUNCT__ "MatLoad_SeqBAIJ" 3467 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3468 { 3469 Mat_SeqBAIJ *a; 3470 PetscErrorCode ierr; 3471 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3472 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3473 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3474 PetscInt *masked,nmask,tmp,bs2,ishift; 3475 PetscMPIInt size; 3476 int fd; 3477 PetscScalar *aa; 3478 MPI_Comm comm; 3479 3480 PetscFunctionBegin; 3481 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3482 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3483 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3484 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3485 bs2 = bs*bs; 3486 3487 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3488 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3489 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3490 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3491 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3492 M = header[1]; N = header[2]; nz = header[3]; 3493 3494 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3495 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3496 3497 /* 3498 This code adds extra rows to make sure the number of rows is 3499 divisible by the blocksize 3500 */ 3501 mbs = M/bs; 3502 extra_rows = bs - M + bs*(mbs); 3503 if (extra_rows == bs) extra_rows = 0; 3504 else mbs++; 3505 if (extra_rows) { 3506 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3507 } 3508 3509 /* Set global sizes if not already set */ 3510 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3511 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3512 } else { /* Check if the matrix global sizes are correct */ 3513 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3514 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3515 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3516 } 3517 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3518 } 3519 3520 /* read in row lengths */ 3521 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3522 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3523 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3524 3525 /* read in column indices */ 3526 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3527 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3528 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3529 3530 /* loop over row lengths determining block row lengths */ 3531 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3532 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3533 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3534 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3535 rowcount = 0; 3536 nzcount = 0; 3537 for (i=0; i<mbs; i++) { 3538 nmask = 0; 3539 for (j=0; j<bs; j++) { 3540 kmax = rowlengths[rowcount]; 3541 for (k=0; k<kmax; k++) { 3542 tmp = jj[nzcount++]/bs; 3543 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3544 } 3545 rowcount++; 3546 } 3547 browlengths[i] += nmask; 3548 /* zero out the mask elements we set */ 3549 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3550 } 3551 3552 /* Do preallocation */ 3553 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3554 a = (Mat_SeqBAIJ*)newmat->data; 3555 3556 /* set matrix "i" values */ 3557 a->i[0] = 0; 3558 for (i=1; i<= mbs; i++) { 3559 a->i[i] = a->i[i-1] + browlengths[i-1]; 3560 a->ilen[i-1] = browlengths[i-1]; 3561 } 3562 a->nz = 0; 3563 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3564 3565 /* read in nonzero values */ 3566 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3567 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3568 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3569 3570 /* set "a" and "j" values into matrix */ 3571 nzcount = 0; jcount = 0; 3572 for (i=0; i<mbs; i++) { 3573 nzcountb = nzcount; 3574 nmask = 0; 3575 for (j=0; j<bs; j++) { 3576 kmax = rowlengths[i*bs+j]; 3577 for (k=0; k<kmax; k++) { 3578 tmp = jj[nzcount++]/bs; 3579 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3580 } 3581 } 3582 /* sort the masked values */ 3583 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3584 3585 /* set "j" values into matrix */ 3586 maskcount = 1; 3587 for (j=0; j<nmask; j++) { 3588 a->j[jcount++] = masked[j]; 3589 mask[masked[j]] = maskcount++; 3590 } 3591 /* set "a" values into matrix */ 3592 ishift = bs2*a->i[i]; 3593 for (j=0; j<bs; j++) { 3594 kmax = rowlengths[i*bs+j]; 3595 for (k=0; k<kmax; k++) { 3596 tmp = jj[nzcountb]/bs; 3597 block = mask[tmp] - 1; 3598 point = jj[nzcountb] - bs*tmp; 3599 idx = ishift + bs2*block + j + bs*point; 3600 a->a[idx] = (MatScalar)aa[nzcountb++]; 3601 } 3602 } 3603 /* zero out the mask elements we set */ 3604 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3605 } 3606 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3607 3608 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3609 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3610 ierr = PetscFree(aa);CHKERRQ(ierr); 3611 ierr = PetscFree(jj);CHKERRQ(ierr); 3612 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3613 3614 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3615 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3616 PetscFunctionReturn(0); 3617 } 3618 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatCreateSeqBAIJ" 3621 /*@C 3622 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3623 compressed row) format. For good matrix assembly performance the 3624 user should preallocate the matrix storage by setting the parameter nz 3625 (or the array nnz). By setting these parameters accurately, performance 3626 during matrix assembly can be increased by more than a factor of 50. 3627 3628 Collective on MPI_Comm 3629 3630 Input Parameters: 3631 + comm - MPI communicator, set to PETSC_COMM_SELF 3632 . bs - size of block 3633 . m - number of rows 3634 . n - number of columns 3635 . nz - number of nonzero blocks per block row (same for all rows) 3636 - nnz - array containing the number of nonzero blocks in the various block rows 3637 (possibly different for each block row) or NULL 3638 3639 Output Parameter: 3640 . A - the matrix 3641 3642 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3643 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3644 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3645 3646 Options Database Keys: 3647 . -mat_no_unroll - uses code that does not unroll the loops in the 3648 block calculations (much slower) 3649 . -mat_block_size - size of the blocks to use 3650 3651 Level: intermediate 3652 3653 Notes: 3654 The number of rows and columns must be divisible by blocksize. 3655 3656 If the nnz parameter is given then the nz parameter is ignored 3657 3658 A nonzero block is any block that as 1 or more nonzeros in it 3659 3660 The block AIJ format is fully compatible with standard Fortran 77 3661 storage. That is, the stored row and column indices can begin at 3662 either one (as in Fortran) or zero. See the users' manual for details. 3663 3664 Specify the preallocated storage with either nz or nnz (not both). 3665 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3666 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3667 matrices. 3668 3669 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3670 @*/ 3671 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3672 { 3673 PetscErrorCode ierr; 3674 3675 PetscFunctionBegin; 3676 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3677 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3678 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3679 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3680 PetscFunctionReturn(0); 3681 } 3682 3683 #undef __FUNCT__ 3684 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3685 /*@C 3686 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3687 per row in the matrix. For good matrix assembly performance the 3688 user should preallocate the matrix storage by setting the parameter nz 3689 (or the array nnz). By setting these parameters accurately, performance 3690 during matrix assembly can be increased by more than a factor of 50. 3691 3692 Collective on MPI_Comm 3693 3694 Input Parameters: 3695 + A - the matrix 3696 . bs - size of block 3697 . nz - number of block nonzeros per block row (same for all rows) 3698 - nnz - array containing the number of block nonzeros in the various block rows 3699 (possibly different for each block row) or NULL 3700 3701 Options Database Keys: 3702 . -mat_no_unroll - uses code that does not unroll the loops in the 3703 block calculations (much slower) 3704 . -mat_block_size - size of the blocks to use 3705 3706 Level: intermediate 3707 3708 Notes: 3709 If the nnz parameter is given then the nz parameter is ignored 3710 3711 You can call MatGetInfo() to get information on how effective the preallocation was; 3712 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3713 You can also run with the option -info and look for messages with the string 3714 malloc in them to see if additional memory allocation was needed. 3715 3716 The block AIJ format is fully compatible with standard Fortran 77 3717 storage. That is, the stored row and column indices can begin at 3718 either one (as in Fortran) or zero. See the users' manual for details. 3719 3720 Specify the preallocated storage with either nz or nnz (not both). 3721 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3722 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3723 3724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3725 @*/ 3726 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3727 { 3728 PetscErrorCode ierr; 3729 3730 PetscFunctionBegin; 3731 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3732 PetscValidType(B,1); 3733 PetscValidLogicalCollectiveInt(B,bs,2); 3734 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3735 PetscFunctionReturn(0); 3736 } 3737 3738 #undef __FUNCT__ 3739 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3740 /*@C 3741 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3742 (the default sequential PETSc format). 3743 3744 Collective on MPI_Comm 3745 3746 Input Parameters: 3747 + A - the matrix 3748 . i - the indices into j for the start of each local row (starts with zero) 3749 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3750 - v - optional values in the matrix 3751 3752 Level: developer 3753 3754 .keywords: matrix, aij, compressed row, sparse 3755 3756 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3757 @*/ 3758 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3759 { 3760 PetscErrorCode ierr; 3761 3762 PetscFunctionBegin; 3763 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3764 PetscValidType(B,1); 3765 PetscValidLogicalCollectiveInt(B,bs,2); 3766 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3767 PetscFunctionReturn(0); 3768 } 3769 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3773 /*@ 3774 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3775 3776 Collective on MPI_Comm 3777 3778 Input Parameters: 3779 + comm - must be an MPI communicator of size 1 3780 . bs - size of block 3781 . m - number of rows 3782 . n - number of columns 3783 . i - row indices 3784 . j - column indices 3785 - a - matrix values 3786 3787 Output Parameter: 3788 . mat - the matrix 3789 3790 Level: advanced 3791 3792 Notes: 3793 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3794 once the matrix is destroyed 3795 3796 You cannot set new nonzero locations into this matrix, that will generate an error. 3797 3798 The i and j indices are 0 based 3799 3800 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3801 3802 3803 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3804 3805 @*/ 3806 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3807 { 3808 PetscErrorCode ierr; 3809 PetscInt ii; 3810 Mat_SeqBAIJ *baij; 3811 3812 PetscFunctionBegin; 3813 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3814 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3815 3816 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3817 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3818 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3819 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3820 baij = (Mat_SeqBAIJ*)(*mat)->data; 3821 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3822 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3823 3824 baij->i = i; 3825 baij->j = j; 3826 baij->a = a; 3827 3828 baij->singlemalloc = PETSC_FALSE; 3829 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3830 baij->free_a = PETSC_FALSE; 3831 baij->free_ij = PETSC_FALSE; 3832 3833 for (ii=0; ii<m; ii++) { 3834 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3835 #if defined(PETSC_USE_DEBUG) 3836 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3837 #endif 3838 } 3839 #if defined(PETSC_USE_DEBUG) 3840 for (ii=0; ii<baij->i[m]; ii++) { 3841 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3842 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3843 } 3844 #endif 3845 3846 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3848 PetscFunctionReturn(0); 3849 } 3850