1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 987 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; 1132 else high = nrow; 1133 lastcol = col; 1134 value = v + k*(stepval+4 + l)*4; 1135 while (high-low > 7) { 1136 t = (low+high)/2; 1137 if (rp[t] > col) high = t; 1138 else low = t; 1139 } 1140 for (i=low; i<high; i++) { 1141 if (rp[i] > col) break; 1142 if (rp[i] == col) { 1143 bap = ap + 16*i; 1144 for (ii=0; ii<4; ii++,value+=stepval) { 1145 for (jj=ii; jj<16; jj+=4) { 1146 bap[jj] += *value++; 1147 } 1148 } 1149 goto noinsert2; 1150 } 1151 } 1152 N = nrow++ - 1; 1153 high++; /* added new column index thus must search to one higher than before */ 1154 /* shift up all the later entries in this row */ 1155 for (ii=N; ii>=i; ii--) { 1156 rp[ii+1] = rp[ii]; 1157 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1158 } 1159 if (N >= i) { 1160 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1161 } 1162 rp[i] = col; 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] = *value++; 1167 } 1168 } 1169 noinsert2:; 1170 low = i; 1171 } 1172 ailen[row] = nrow; 1173 } 1174 PetscFunctionReturnVoid(); 1175 } 1176 EXTERN_C_END 1177 1178 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1179 #define matsetvalues4_ MATSETVALUES4 1180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1181 #define matsetvalues4_ matsetvalues4 1182 #endif 1183 1184 EXTERN_C_BEGIN 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatSetValues4_" 1187 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1188 { 1189 Mat A = *AA; 1190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1191 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1192 PetscInt *ai=a->i,*ailen=a->ilen; 1193 PetscInt *aj=a->j,brow,bcol; 1194 PetscInt ridx,cidx,lastcol = -1; 1195 MatScalar *ap,value,*aa=a->a,*bap; 1196 1197 PetscFunctionBegin; 1198 for (k=0; k<m; k++) { /* loop over added rows */ 1199 row = im[k]; brow = row/4; 1200 rp = aj + ai[brow]; 1201 ap = aa + 16*ai[brow]; 1202 nrow = ailen[brow]; 1203 low = 0; 1204 high = nrow; 1205 for (l=0; l<n; l++) { /* loop over added columns */ 1206 col = in[l]; bcol = col/4; 1207 ridx = row % 4; cidx = col % 4; 1208 value = v[l + k*n]; 1209 if (col <= lastcol) low = 0; 1210 else high = nrow; 1211 lastcol = col; 1212 while (high-low > 7) { 1213 t = (low+high)/2; 1214 if (rp[t] > bcol) high = t; 1215 else low = t; 1216 } 1217 for (i=low; i<high; i++) { 1218 if (rp[i] > bcol) break; 1219 if (rp[i] == bcol) { 1220 bap = ap + 16*i + 4*cidx + ridx; 1221 *bap += value; 1222 goto noinsert1; 1223 } 1224 } 1225 N = nrow++ - 1; 1226 high++; /* added new column thus must search to one higher than before */ 1227 /* shift up all the later entries in this row */ 1228 for (ii=N; ii>=i; ii--) { 1229 rp[ii+1] = rp[ii]; 1230 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1231 } 1232 if (N>=i) { 1233 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1234 } 1235 rp[i] = bcol; 1236 ap[16*i + 4*cidx + ridx] = value; 1237 noinsert1:; 1238 low = i; 1239 } 1240 ailen[brow] = nrow; 1241 } 1242 PetscFunctionReturnVoid(); 1243 } 1244 EXTERN_C_END 1245 1246 /* 1247 Checks for missing diagonals 1248 */ 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1251 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1252 { 1253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 PetscInt *diag,*jj = a->j,i; 1256 1257 PetscFunctionBegin; 1258 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1259 *missing = PETSC_FALSE; 1260 if (A->rmap->n > 0 && !jj) { 1261 *missing = PETSC_TRUE; 1262 if (d) *d = 0; 1263 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1264 } else { 1265 diag = a->diag; 1266 for (i=0; i<a->mbs; i++) { 1267 if (jj[diag[i]] != i) { 1268 *missing = PETSC_TRUE; 1269 if (d) *d = i; 1270 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1271 break; 1272 } 1273 } 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1280 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1281 { 1282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1283 PetscErrorCode ierr; 1284 PetscInt i,j,m = a->mbs; 1285 1286 PetscFunctionBegin; 1287 if (!a->diag) { 1288 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1289 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1290 a->free_diag = PETSC_TRUE; 1291 } 1292 for (i=0; i<m; i++) { 1293 a->diag[i] = a->i[i+1]; 1294 for (j=a->i[i]; j<a->i[i+1]; j++) { 1295 if (a->j[j] == i) { 1296 a->diag[i] = j; 1297 break; 1298 } 1299 } 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1312 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1313 1314 PetscFunctionBegin; 1315 *nn = n; 1316 if (!ia) PetscFunctionReturn(0); 1317 if (symmetric) { 1318 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1319 nz = tia[n]; 1320 } else { 1321 tia = a->i; tja = a->j; 1322 } 1323 1324 if (!blockcompressed && bs > 1) { 1325 (*nn) *= bs; 1326 /* malloc & create the natural set of indices */ 1327 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1328 if (n) { 1329 (*ia)[0] = 0; 1330 for (j=1; j<bs; j++) { 1331 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1332 } 1333 } 1334 1335 for (i=1; i<n; i++) { 1336 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1337 for (j=1; j<bs; j++) { 1338 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1339 } 1340 } 1341 if (n) { 1342 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1343 } 1344 1345 if (inja) { 1346 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1347 cnt = 0; 1348 for (i=0; i<n; i++) { 1349 for (j=0; j<bs; j++) { 1350 for (k=tia[i]; k<tia[i+1]; k++) { 1351 for (l=0; l<bs; l++) { 1352 (*ja)[cnt++] = bs*tja[k] + l; 1353 } 1354 } 1355 } 1356 } 1357 } 1358 1359 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1360 ierr = PetscFree(tia);CHKERRQ(ierr); 1361 ierr = PetscFree(tja);CHKERRQ(ierr); 1362 } 1363 } else if (oshift == 1) { 1364 if (symmetric) { 1365 nz = tia[A->rmap->n/bs]; 1366 /* add 1 to i and j indices */ 1367 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1368 *ia = tia; 1369 if (ja) { 1370 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1371 *ja = tja; 1372 } 1373 } else { 1374 nz = a->i[A->rmap->n/bs]; 1375 /* malloc space and add 1 to i and j indices */ 1376 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1377 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1378 if (ja) { 1379 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1380 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1381 } 1382 } 1383 } else { 1384 *ia = tia; 1385 if (ja) *ja = tja; 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1393 { 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 if (!ia) PetscFunctionReturn(0); 1398 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1399 ierr = PetscFree(*ia);CHKERRQ(ierr); 1400 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1408 { 1409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 #if defined(PETSC_USE_LOG) 1414 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1415 #endif 1416 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1417 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1418 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1419 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1420 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1421 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1422 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1423 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1424 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1425 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1426 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1427 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1428 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1429 1430 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1431 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1432 ierr = PetscFree(A->data);CHKERRQ(ierr); 1433 1434 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1451 { 1452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 switch (op) { 1457 case MAT_ROW_ORIENTED: 1458 a->roworiented = flg; 1459 break; 1460 case MAT_KEEP_NONZERO_PATTERN: 1461 a->keepnonzeropattern = flg; 1462 break; 1463 case MAT_NEW_NONZERO_LOCATIONS: 1464 a->nonew = (flg ? 0 : 1); 1465 break; 1466 case MAT_NEW_NONZERO_LOCATION_ERR: 1467 a->nonew = (flg ? -1 : 0); 1468 break; 1469 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1470 a->nonew = (flg ? -2 : 0); 1471 break; 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 a->nounused = (flg ? -1 : 0); 1474 break; 1475 case MAT_CHECK_COMPRESSED_ROW: 1476 a->compressedrow.check = flg; 1477 break; 1478 case MAT_NEW_DIAGONALS: 1479 case MAT_IGNORE_OFF_PROC_ENTRIES: 1480 case MAT_USE_HASH_TABLE: 1481 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1482 break; 1483 case MAT_SPD: 1484 case MAT_SYMMETRIC: 1485 case MAT_STRUCTURALLY_SYMMETRIC: 1486 case MAT_HERMITIAN: 1487 case MAT_SYMMETRY_ETERNAL: 1488 /* These options are handled directly by MatSetOption() */ 1489 break; 1490 default: 1491 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1499 { 1500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1503 MatScalar *aa,*aa_i; 1504 PetscScalar *v_i; 1505 1506 PetscFunctionBegin; 1507 bs = A->rmap->bs; 1508 ai = a->i; 1509 aj = a->j; 1510 aa = a->a; 1511 bs2 = a->bs2; 1512 1513 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1514 1515 bn = row/bs; /* Block number */ 1516 bp = row % bs; /* Block Position */ 1517 M = ai[bn+1] - ai[bn]; 1518 *nz = bs*M; 1519 1520 if (v) { 1521 *v = 0; 1522 if (*nz) { 1523 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1524 for (i=0; i<M; i++) { /* for each block in the block row */ 1525 v_i = *v + i*bs; 1526 aa_i = aa + bs2*(ai[bn] + i); 1527 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1528 } 1529 } 1530 } 1531 1532 if (idx) { 1533 *idx = 0; 1534 if (*nz) { 1535 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1536 for (i=0; i<M; i++) { /* for each block in the block row */ 1537 idx_i = *idx + i*bs; 1538 itmp = bs*aj[ai[bn] + i]; 1539 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1540 } 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1554 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1555 PetscFunctionReturn(0); 1556 } 1557 1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1563 { 1564 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1565 Mat C; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1568 PetscInt *rows,*cols,bs2=a->bs2; 1569 MatScalar *array; 1570 1571 PetscFunctionBegin; 1572 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1573 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1574 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1575 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1576 1577 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1578 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1579 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1580 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1581 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1582 ierr = PetscFree(col);CHKERRQ(ierr); 1583 } else { 1584 C = *B; 1585 } 1586 1587 array = a->a; 1588 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1589 for (i=0; i<mbs; i++) { 1590 cols[0] = i*bs; 1591 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1592 len = ai[i+1] - ai[i]; 1593 for (j=0; j<len; j++) { 1594 rows[0] = (*aj++)*bs; 1595 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1596 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1597 array += bs2; 1598 } 1599 } 1600 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1601 1602 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1603 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1604 1605 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1606 *B = C; 1607 } else { 1608 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 EXTERN_C_BEGIN 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1617 { 1618 PetscErrorCode ierr; 1619 Mat Btrans; 1620 1621 PetscFunctionBegin; 1622 *f = PETSC_FALSE; 1623 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1624 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1625 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 EXTERN_C_END 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1633 { 1634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1635 PetscErrorCode ierr; 1636 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1637 int fd; 1638 PetscScalar *aa; 1639 FILE *file; 1640 1641 PetscFunctionBegin; 1642 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1643 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1644 col_lens[0] = MAT_FILE_CLASSID; 1645 1646 col_lens[1] = A->rmap->N; 1647 col_lens[2] = A->cmap->n; 1648 col_lens[3] = a->nz*bs2; 1649 1650 /* store lengths of each row and write (including header) to file */ 1651 count = 0; 1652 for (i=0; i<a->mbs; i++) { 1653 for (j=0; j<bs; j++) { 1654 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1655 } 1656 } 1657 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1658 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1659 1660 /* store column indices (zero start index) */ 1661 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1662 count = 0; 1663 for (i=0; i<a->mbs; i++) { 1664 for (j=0; j<bs; j++) { 1665 for (k=a->i[i]; k<a->i[i+1]; k++) { 1666 for (l=0; l<bs; l++) { 1667 jj[count++] = bs*a->j[k] + l; 1668 } 1669 } 1670 } 1671 } 1672 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1673 ierr = PetscFree(jj);CHKERRQ(ierr); 1674 1675 /* store nonzero values */ 1676 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1677 count = 0; 1678 for (i=0; i<a->mbs; i++) { 1679 for (j=0; j<bs; j++) { 1680 for (k=a->i[i]; k<a->i[i+1]; k++) { 1681 for (l=0; l<bs; l++) { 1682 aa[count++] = a->a[bs2*k + l*bs + j]; 1683 } 1684 } 1685 } 1686 } 1687 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1688 ierr = PetscFree(aa);CHKERRQ(ierr); 1689 1690 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1691 if (file) { 1692 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1700 { 1701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1704 PetscViewerFormat format; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1708 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1709 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1710 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1711 Mat aij; 1712 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1713 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1714 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1715 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1716 PetscFunctionReturn(0); 1717 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1718 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1719 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1720 for (i=0; i<a->mbs; i++) { 1721 for (j=0; j<bs; j++) { 1722 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1723 for (k=a->i[i]; k<a->i[i+1]; k++) { 1724 for (l=0; l<bs; l++) { 1725 #if defined(PETSC_USE_COMPLEX) 1726 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1727 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1728 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1729 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1730 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1731 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1733 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1734 } 1735 #else 1736 if (a->a[bs2*k + l*bs + j] != 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1738 } 1739 #endif 1740 } 1741 } 1742 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1743 } 1744 } 1745 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1746 } else { 1747 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1748 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1749 for (i=0; i<a->mbs; i++) { 1750 for (j=0; j<bs; j++) { 1751 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1752 for (k=a->i[i]; k<a->i[i+1]; k++) { 1753 for (l=0; l<bs; l++) { 1754 #if defined(PETSC_USE_COMPLEX) 1755 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1756 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1757 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1758 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1760 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } else { 1762 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1763 } 1764 #else 1765 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1766 #endif 1767 } 1768 } 1769 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1770 } 1771 } 1772 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1773 } 1774 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1781 { 1782 Mat A = (Mat) Aa; 1783 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1784 PetscErrorCode ierr; 1785 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1786 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1787 MatScalar *aa; 1788 PetscViewer viewer; 1789 PetscViewerFormat format; 1790 1791 PetscFunctionBegin; 1792 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1793 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1794 1795 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1796 1797 /* loop over matrix elements drawing boxes */ 1798 1799 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1800 color = PETSC_DRAW_BLUE; 1801 for (i=0,row=0; i<mbs; i++,row+=bs) { 1802 for (j=a->i[i]; j<a->i[i+1]; j++) { 1803 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1804 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1805 aa = a->a + j*bs2; 1806 for (k=0; k<bs; k++) { 1807 for (l=0; l<bs; l++) { 1808 if (PetscRealPart(*aa++) >= 0.) continue; 1809 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1810 } 1811 } 1812 } 1813 } 1814 color = PETSC_DRAW_CYAN; 1815 for (i=0,row=0; i<mbs; i++,row+=bs) { 1816 for (j=a->i[i]; j<a->i[i+1]; j++) { 1817 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1818 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1819 aa = a->a + j*bs2; 1820 for (k=0; k<bs; k++) { 1821 for (l=0; l<bs; l++) { 1822 if (PetscRealPart(*aa++) != 0.) continue; 1823 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1824 } 1825 } 1826 } 1827 } 1828 color = PETSC_DRAW_RED; 1829 for (i=0,row=0; i<mbs; i++,row+=bs) { 1830 for (j=a->i[i]; j<a->i[i+1]; j++) { 1831 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1832 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1833 aa = a->a + j*bs2; 1834 for (k=0; k<bs; k++) { 1835 for (l=0; l<bs; l++) { 1836 if (PetscRealPart(*aa++) <= 0.) continue; 1837 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1838 } 1839 } 1840 } 1841 } 1842 } else { 1843 /* use contour shading to indicate magnitude of values */ 1844 /* first determine max of all nonzero values */ 1845 PetscDraw popup; 1846 PetscReal scale,maxv = 0.0; 1847 1848 for (i=0; i<a->nz*a->bs2; i++) { 1849 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1850 } 1851 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1852 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1853 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1854 for (i=0,row=0; i<mbs; i++,row+=bs) { 1855 for (j=a->i[i]; j<a->i[i+1]; j++) { 1856 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1857 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1858 aa = a->a + j*bs2; 1859 for (k=0; k<bs; k++) { 1860 for (l=0; l<bs; l++) { 1861 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1862 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1863 } 1864 } 1865 } 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1874 { 1875 PetscErrorCode ierr; 1876 PetscReal xl,yl,xr,yr,w,h; 1877 PetscDraw draw; 1878 PetscBool isnull; 1879 1880 PetscFunctionBegin; 1881 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1882 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1883 1884 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1885 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1886 xr += w; yr += h; xl = -w; yl = -h; 1887 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1888 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1889 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatView_SeqBAIJ" 1895 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1896 { 1897 PetscErrorCode ierr; 1898 PetscBool iascii,isbinary,isdraw; 1899 1900 PetscFunctionBegin; 1901 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1902 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1903 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1904 if (iascii) { 1905 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1906 } else if (isbinary) { 1907 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1908 } else if (isdraw) { 1909 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1910 } else { 1911 Mat B; 1912 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1913 ierr = MatView(B,viewer);CHKERRQ(ierr); 1914 ierr = MatDestroy(&B);CHKERRQ(ierr); 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1922 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1923 { 1924 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1925 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1926 PetscInt *ai = a->i,*ailen = a->ilen; 1927 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1928 MatScalar *ap,*aa = a->a; 1929 1930 PetscFunctionBegin; 1931 for (k=0; k<m; k++) { /* loop over rows */ 1932 row = im[k]; brow = row/bs; 1933 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1934 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1935 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1936 nrow = ailen[brow]; 1937 for (l=0; l<n; l++) { /* loop over columns */ 1938 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1939 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1940 col = in[l] ; 1941 bcol = col/bs; 1942 cidx = col%bs; 1943 ridx = row%bs; 1944 high = nrow; 1945 low = 0; /* assume unsorted */ 1946 while (high-low > 5) { 1947 t = (low+high)/2; 1948 if (rp[t] > bcol) high = t; 1949 else low = t; 1950 } 1951 for (i=low; i<high; i++) { 1952 if (rp[i] > bcol) break; 1953 if (rp[i] == bcol) { 1954 *v++ = ap[bs2*i+bs*cidx+ridx]; 1955 goto finished; 1956 } 1957 } 1958 *v++ = 0.0; 1959 finished:; 1960 } 1961 } 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1967 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1968 { 1969 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1970 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1971 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1972 PetscErrorCode ierr; 1973 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1974 PetscBool roworiented=a->roworiented; 1975 const PetscScalar *value = v; 1976 MatScalar *ap,*aa = a->a,*bap; 1977 1978 PetscFunctionBegin; 1979 if (roworiented) { 1980 stepval = (n-1)*bs; 1981 } else { 1982 stepval = (m-1)*bs; 1983 } 1984 for (k=0; k<m; k++) { /* loop over added rows */ 1985 row = im[k]; 1986 if (row < 0) continue; 1987 #if defined(PETSC_USE_DEBUG) 1988 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1989 #endif 1990 rp = aj + ai[row]; 1991 ap = aa + bs2*ai[row]; 1992 rmax = imax[row]; 1993 nrow = ailen[row]; 1994 low = 0; 1995 high = nrow; 1996 for (l=0; l<n; l++) { /* loop over added columns */ 1997 if (in[l] < 0) continue; 1998 #if defined(PETSC_USE_DEBUG) 1999 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2000 #endif 2001 col = in[l]; 2002 if (roworiented) { 2003 value = v + (k*(stepval+bs) + l)*bs; 2004 } else { 2005 value = v + (l*(stepval+bs) + k)*bs; 2006 } 2007 if (col <= lastcol) low = 0; else high = nrow; 2008 lastcol = col; 2009 while (high-low > 7) { 2010 t = (low+high)/2; 2011 if (rp[t] > col) high = t; 2012 else low = t; 2013 } 2014 for (i=low; i<high; i++) { 2015 if (rp[i] > col) break; 2016 if (rp[i] == col) { 2017 bap = ap + bs2*i; 2018 if (roworiented) { 2019 if (is == ADD_VALUES) { 2020 for (ii=0; ii<bs; ii++,value+=stepval) { 2021 for (jj=ii; jj<bs2; jj+=bs) { 2022 bap[jj] += *value++; 2023 } 2024 } 2025 } else { 2026 for (ii=0; ii<bs; ii++,value+=stepval) { 2027 for (jj=ii; jj<bs2; jj+=bs) { 2028 bap[jj] = *value++; 2029 } 2030 } 2031 } 2032 } else { 2033 if (is == ADD_VALUES) { 2034 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2035 for (jj=0; jj<bs; jj++) { 2036 bap[jj] += value[jj]; 2037 } 2038 bap += bs; 2039 } 2040 } else { 2041 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2042 for (jj=0; jj<bs; jj++) { 2043 bap[jj] = value[jj]; 2044 } 2045 bap += bs; 2046 } 2047 } 2048 } 2049 goto noinsert2; 2050 } 2051 } 2052 if (nonew == 1) goto noinsert2; 2053 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2054 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2055 N = nrow++ - 1; high++; 2056 /* shift up all the later entries in this row */ 2057 for (ii=N; ii>=i; ii--) { 2058 rp[ii+1] = rp[ii]; 2059 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2060 } 2061 if (N >= i) { 2062 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2063 } 2064 rp[i] = col; 2065 bap = ap + bs2*i; 2066 if (roworiented) { 2067 for (ii=0; ii<bs; ii++,value+=stepval) { 2068 for (jj=ii; jj<bs2; jj+=bs) { 2069 bap[jj] = *value++; 2070 } 2071 } 2072 } else { 2073 for (ii=0; ii<bs; ii++,value+=stepval) { 2074 for (jj=0; jj<bs; jj++) { 2075 *bap++ = *value++; 2076 } 2077 } 2078 } 2079 noinsert2:; 2080 low = i; 2081 } 2082 ailen[row] = nrow; 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2089 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2090 { 2091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2092 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2093 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2094 PetscErrorCode ierr; 2095 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2096 MatScalar *aa = a->a,*ap; 2097 PetscReal ratio=0.6; 2098 2099 PetscFunctionBegin; 2100 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2101 2102 if (m) rmax = ailen[0]; 2103 for (i=1; i<mbs; i++) { 2104 /* move each row back by the amount of empty slots (fshift) before it*/ 2105 fshift += imax[i-1] - ailen[i-1]; 2106 rmax = PetscMax(rmax,ailen[i]); 2107 if (fshift) { 2108 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2109 N = ailen[i]; 2110 for (j=0; j<N; j++) { 2111 ip[j-fshift] = ip[j]; 2112 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2113 } 2114 } 2115 ai[i] = ai[i-1] + ailen[i-1]; 2116 } 2117 if (mbs) { 2118 fshift += imax[mbs-1] - ailen[mbs-1]; 2119 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2120 } 2121 /* reset ilen and imax for each row */ 2122 for (i=0; i<mbs; i++) { 2123 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2124 } 2125 a->nz = ai[mbs]; 2126 2127 /* diagonals may have moved, so kill the diagonal pointers */ 2128 a->idiagvalid = PETSC_FALSE; 2129 if (fshift && a->diag) { 2130 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2131 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2132 a->diag = 0; 2133 } 2134 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2135 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2136 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2137 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2138 A->info.mallocs += a->reallocs; 2139 a->reallocs = 0; 2140 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2141 2142 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2143 A->same_nonzero = PETSC_TRUE; 2144 PetscFunctionReturn(0); 2145 } 2146 2147 /* 2148 This function returns an array of flags which indicate the locations of contiguous 2149 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2150 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2151 Assume: sizes should be long enough to hold all the values. 2152 */ 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2155 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2156 { 2157 PetscInt i,j,k,row; 2158 PetscBool flg; 2159 2160 PetscFunctionBegin; 2161 for (i=0,j=0; i<n; j++) { 2162 row = idx[i]; 2163 if (row%bs!=0) { /* Not the begining of a block */ 2164 sizes[j] = 1; 2165 i++; 2166 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2167 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2168 i++; 2169 } else { /* Begining of the block, so check if the complete block exists */ 2170 flg = PETSC_TRUE; 2171 for (k=1; k<bs; k++) { 2172 if (row+k != idx[i+k]) { /* break in the block */ 2173 flg = PETSC_FALSE; 2174 break; 2175 } 2176 } 2177 if (flg) { /* No break in the bs */ 2178 sizes[j] = bs; 2179 i+= bs; 2180 } else { 2181 sizes[j] = 1; 2182 i++; 2183 } 2184 } 2185 } 2186 *bs_max = j; 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2192 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2193 { 2194 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2195 PetscErrorCode ierr; 2196 PetscInt i,j,k,count,*rows; 2197 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2198 PetscScalar zero = 0.0; 2199 MatScalar *aa; 2200 const PetscScalar *xx; 2201 PetscScalar *bb; 2202 2203 PetscFunctionBegin; 2204 /* fix right hand side if needed */ 2205 if (x && b) { 2206 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2207 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2208 for (i=0; i<is_n; i++) { 2209 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2210 } 2211 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2212 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2213 } 2214 2215 /* Make a copy of the IS and sort it */ 2216 /* allocate memory for rows,sizes */ 2217 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2218 2219 /* copy IS values to rows, and sort them */ 2220 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2221 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2222 2223 if (baij->keepnonzeropattern) { 2224 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2225 bs_max = is_n; 2226 A->same_nonzero = PETSC_TRUE; 2227 } else { 2228 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2229 A->same_nonzero = PETSC_FALSE; 2230 } 2231 2232 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2233 row = rows[j]; 2234 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2235 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2236 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2237 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2238 if (diag != (PetscScalar)0.0) { 2239 if (baij->ilen[row/bs] > 0) { 2240 baij->ilen[row/bs] = 1; 2241 baij->j[baij->i[row/bs]] = row/bs; 2242 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2243 } 2244 /* Now insert all the diagonal values for this bs */ 2245 for (k=0; k<bs; k++) { 2246 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2247 } 2248 } else { /* (diag == 0.0) */ 2249 baij->ilen[row/bs] = 0; 2250 } /* end (diag == 0.0) */ 2251 } else { /* (sizes[i] != bs) */ 2252 #if defined (PETSC_USE_DEBUG) 2253 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2254 #endif 2255 for (k=0; k<count; k++) { 2256 aa[0] = zero; 2257 aa += bs; 2258 } 2259 if (diag != (PetscScalar)0.0) { 2260 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2261 } 2262 } 2263 } 2264 2265 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2266 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2272 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2273 { 2274 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2275 PetscErrorCode ierr; 2276 PetscInt i,j,k,count; 2277 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2278 PetscScalar zero = 0.0; 2279 MatScalar *aa; 2280 const PetscScalar *xx; 2281 PetscScalar *bb; 2282 PetscBool *zeroed,vecs = PETSC_FALSE; 2283 2284 PetscFunctionBegin; 2285 /* fix right hand side if needed */ 2286 if (x && b) { 2287 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2288 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2289 vecs = PETSC_TRUE; 2290 } 2291 A->same_nonzero = PETSC_TRUE; 2292 2293 /* zero the columns */ 2294 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2295 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2296 for (i=0; i<is_n; i++) { 2297 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2298 zeroed[is_idx[i]] = PETSC_TRUE; 2299 } 2300 for (i=0; i<A->rmap->N; i++) { 2301 if (!zeroed[i]) { 2302 row = i/bs; 2303 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2304 for (k=0; k<bs; k++) { 2305 col = bs*baij->j[j] + k; 2306 if (zeroed[col]) { 2307 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2308 if (vecs) bb[i] -= aa[0]*xx[col]; 2309 aa[0] = 0.0; 2310 } 2311 } 2312 } 2313 } else if (vecs) bb[i] = diag*xx[i]; 2314 } 2315 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2316 if (vecs) { 2317 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2318 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2319 } 2320 2321 /* zero the rows */ 2322 for (i=0; i<is_n; i++) { 2323 row = is_idx[i]; 2324 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2325 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2326 for (k=0; k<count; k++) { 2327 aa[0] = zero; 2328 aa += bs; 2329 } 2330 if (diag != (PetscScalar)0.0) { 2331 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2332 } 2333 } 2334 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2340 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2341 { 2342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2343 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2344 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2345 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2346 PetscErrorCode ierr; 2347 PetscInt ridx,cidx,bs2=a->bs2; 2348 PetscBool roworiented=a->roworiented; 2349 MatScalar *ap,value,*aa=a->a,*bap; 2350 2351 PetscFunctionBegin; 2352 if (v) PetscValidScalarPointer(v,6); 2353 for (k=0; k<m; k++) { /* loop over added rows */ 2354 row = im[k]; 2355 brow = row/bs; 2356 if (row < 0) continue; 2357 #if defined(PETSC_USE_DEBUG) 2358 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2359 #endif 2360 rp = aj + ai[brow]; 2361 ap = aa + bs2*ai[brow]; 2362 rmax = imax[brow]; 2363 nrow = ailen[brow]; 2364 low = 0; 2365 high = nrow; 2366 for (l=0; l<n; l++) { /* loop over added columns */ 2367 if (in[l] < 0) continue; 2368 #if defined(PETSC_USE_DEBUG) 2369 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2370 #endif 2371 col = in[l]; bcol = col/bs; 2372 ridx = row % bs; cidx = col % bs; 2373 if (roworiented) { 2374 value = v[l + k*n]; 2375 } else { 2376 value = v[k + l*m]; 2377 } 2378 if (col <= lastcol) low = 0; else high = nrow; 2379 lastcol = col; 2380 while (high-low > 7) { 2381 t = (low+high)/2; 2382 if (rp[t] > bcol) high = t; 2383 else low = t; 2384 } 2385 for (i=low; i<high; i++) { 2386 if (rp[i] > bcol) break; 2387 if (rp[i] == bcol) { 2388 bap = ap + bs2*i + bs*cidx + ridx; 2389 if (is == ADD_VALUES) *bap += value; 2390 else *bap = value; 2391 goto noinsert1; 2392 } 2393 } 2394 if (nonew == 1) goto noinsert1; 2395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2396 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2397 N = nrow++ - 1; high++; 2398 /* shift up all the later entries in this row */ 2399 for (ii=N; ii>=i; ii--) { 2400 rp[ii+1] = rp[ii]; 2401 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2402 } 2403 if (N>=i) { 2404 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2405 } 2406 rp[i] = bcol; 2407 ap[bs2*i + bs*cidx + ridx] = value; 2408 a->nz++; 2409 noinsert1:; 2410 low = i; 2411 } 2412 ailen[brow] = nrow; 2413 } 2414 A->same_nonzero = PETSC_FALSE; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2420 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2421 { 2422 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2423 Mat outA; 2424 PetscErrorCode ierr; 2425 PetscBool row_identity,col_identity; 2426 2427 PetscFunctionBegin; 2428 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2429 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2430 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2431 if (!row_identity || !col_identity) { 2432 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2433 } 2434 2435 outA = inA; 2436 inA->factortype = MAT_FACTOR_LU; 2437 2438 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2439 2440 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2441 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2442 a->row = row; 2443 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2444 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2445 a->col = col; 2446 2447 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2448 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2449 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2450 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2451 2452 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2453 if (!a->solve_work) { 2454 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2455 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2456 } 2457 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2458 2459 PetscFunctionReturn(0); 2460 } 2461 2462 EXTERN_C_BEGIN 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2465 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2466 { 2467 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2468 PetscInt i,nz,mbs; 2469 2470 PetscFunctionBegin; 2471 nz = baij->maxnz; 2472 mbs = baij->mbs; 2473 for (i=0; i<nz; i++) { 2474 baij->j[i] = indices[i]; 2475 } 2476 baij->nz = nz; 2477 for (i=0; i<mbs; i++) { 2478 baij->ilen[i] = baij->imax[i]; 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 EXTERN_C_END 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2486 /*@ 2487 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2488 in the matrix. 2489 2490 Input Parameters: 2491 + mat - the SeqBAIJ matrix 2492 - indices - the column indices 2493 2494 Level: advanced 2495 2496 Notes: 2497 This can be called if you have precomputed the nonzero structure of the 2498 matrix and want to provide it to the matrix object to improve the performance 2499 of the MatSetValues() operation. 2500 2501 You MUST have set the correct numbers of nonzeros per row in the call to 2502 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2503 2504 MUST be called before any calls to MatSetValues(); 2505 2506 @*/ 2507 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2508 { 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2513 PetscValidPointer(indices,2); 2514 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2520 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2521 { 2522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2523 PetscErrorCode ierr; 2524 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2525 PetscReal atmp; 2526 PetscScalar *x,zero = 0.0; 2527 MatScalar *aa; 2528 PetscInt ncols,brow,krow,kcol; 2529 2530 PetscFunctionBegin; 2531 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2532 bs = A->rmap->bs; 2533 aa = a->a; 2534 ai = a->i; 2535 aj = a->j; 2536 mbs = a->mbs; 2537 2538 ierr = VecSet(v,zero);CHKERRQ(ierr); 2539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2540 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2541 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2542 for (i=0; i<mbs; i++) { 2543 ncols = ai[1] - ai[0]; ai++; 2544 brow = bs*i; 2545 for (j=0; j<ncols; j++) { 2546 for (kcol=0; kcol<bs; kcol++) { 2547 for (krow=0; krow<bs; krow++) { 2548 atmp = PetscAbsScalar(*aa);aa++; 2549 row = brow + krow; /* row index */ 2550 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2551 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2552 } 2553 } 2554 aj++; 2555 } 2556 } 2557 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatCopy_SeqBAIJ" 2563 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2564 { 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 /* If the two matrices have the same copy implementation, use fast copy. */ 2569 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2571 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2572 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2573 2574 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2575 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2576 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2577 } else { 2578 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 #undef __FUNCT__ 2584 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2585 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2586 { 2587 PetscErrorCode ierr; 2588 2589 PetscFunctionBegin; 2590 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2596 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2597 { 2598 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2599 2600 PetscFunctionBegin; 2601 *array = a->a; 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2608 { 2609 PetscFunctionBegin; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2616 { 2617 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2618 PetscErrorCode ierr; 2619 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2620 PetscBLASInt one=1; 2621 2622 PetscFunctionBegin; 2623 if (str == SAME_NONZERO_PATTERN) { 2624 PetscScalar alpha = a; 2625 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2626 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2627 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2628 if (y->xtoy && y->XtoY != X) { 2629 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2630 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2631 } 2632 if (!y->xtoy) { /* get xtoy */ 2633 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2634 y->XtoY = X; 2635 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2636 } 2637 for (i=0; i<x->nz; i++) { 2638 j = 0; 2639 while (j < bs2) { 2640 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2641 j++; 2642 } 2643 } 2644 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2645 } else { 2646 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2647 } 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2654 { 2655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2656 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2657 MatScalar *aa = a->a; 2658 2659 PetscFunctionBegin; 2660 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2667 { 2668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2669 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2670 MatScalar *aa = a->a; 2671 2672 PetscFunctionBegin; 2673 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2681 /* 2682 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2683 */ 2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2685 { 2686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2687 PetscErrorCode ierr; 2688 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2689 PetscInt nz = a->i[m],row,*jj,mr,col; 2690 2691 PetscFunctionBegin; 2692 *nn = n; 2693 if (!ia) PetscFunctionReturn(0); 2694 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2695 else { 2696 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2697 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2699 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2700 jj = a->j; 2701 for (i=0; i<nz; i++) { 2702 collengths[jj[i]]++; 2703 } 2704 cia[0] = oshift; 2705 for (i=0; i<n; i++) { 2706 cia[i+1] = cia[i] + collengths[i]; 2707 } 2708 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2709 jj = a->j; 2710 for (row=0; row<m; row++) { 2711 mr = a->i[row+1] - a->i[row]; 2712 for (i=0; i<mr; i++) { 2713 col = *jj++; 2714 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2715 } 2716 } 2717 ierr = PetscFree(collengths);CHKERRQ(ierr); 2718 *ia = cia; *ja = cja; 2719 } 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2726 { 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 if (!ia) PetscFunctionReturn(0); 2731 ierr = PetscFree(*ia);CHKERRQ(ierr); 2732 ierr = PetscFree(*ja);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2738 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2739 { 2740 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2741 PetscErrorCode ierr; 2742 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2743 PetscScalar dx,*y,*xx,*w3_array; 2744 PetscScalar *vscale_array; 2745 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2746 Vec w1=coloring->w1,w2=coloring->w2,w3; 2747 void *fctx = coloring->fctx; 2748 PetscBool flg = PETSC_FALSE; 2749 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2750 Vec x1_tmp; 2751 2752 PetscFunctionBegin; 2753 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2754 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2755 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2756 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2757 2758 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2759 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2760 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2761 if (flg) { 2762 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2763 } else { 2764 PetscBool assembled; 2765 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2766 if (assembled) { 2767 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2768 } 2769 } 2770 2771 x1_tmp = x1; 2772 if (!coloring->vscale) { 2773 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2774 } 2775 2776 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2777 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2778 } 2779 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2780 2781 /* Set w1 = F(x1) */ 2782 if (!coloring->fset) { 2783 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2784 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2785 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2786 } else { 2787 coloring->fset = PETSC_FALSE; 2788 } 2789 2790 if (!coloring->w3) { 2791 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2793 } 2794 w3 = coloring->w3; 2795 2796 CHKMEMQ; 2797 /* Compute all the local scale factors, including ghost points */ 2798 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2799 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2800 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2801 if (ctype == IS_COLORING_GHOSTED) { 2802 col_start = 0; col_end = N; 2803 } else if (ctype == IS_COLORING_GLOBAL) { 2804 xx = xx - start; 2805 vscale_array = vscale_array - start; 2806 col_start = start; col_end = N + start; 2807 } CHKMEMQ; 2808 for (col=col_start; col<col_end; col++) { 2809 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2810 if (coloring->htype[0] == 'w') { 2811 dx = 1.0 + unorm; 2812 } else { 2813 dx = xx[col]; 2814 } 2815 if (dx == (PetscScalar)0.0) dx = 1.0; 2816 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2817 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2818 dx *= epsilon; 2819 vscale_array[col] = (PetscScalar)1.0/dx; 2820 } CHKMEMQ; 2821 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2822 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2823 if (ctype == IS_COLORING_GLOBAL) { 2824 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2825 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2826 } 2827 CHKMEMQ; 2828 if (coloring->vscaleforrow) { 2829 vscaleforrow = coloring->vscaleforrow; 2830 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2831 2832 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2833 /* 2834 Loop over each color 2835 */ 2836 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2837 for (k=0; k<coloring->ncolors; k++) { 2838 coloring->currentcolor = k; 2839 for (i=0; i<bs; i++) { 2840 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2841 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2842 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2843 /* 2844 Loop over each column associated with color 2845 adding the perturbation to the vector w3. 2846 */ 2847 for (l=0; l<coloring->ncolumns[k]; l++) { 2848 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2849 if (coloring->htype[0] == 'w') { 2850 dx = 1.0 + unorm; 2851 } else { 2852 dx = xx[col]; 2853 } 2854 if (dx == (PetscScalar)0.0) dx = 1.0; 2855 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2856 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2857 dx *= epsilon; 2858 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2859 w3_array[col] += dx; 2860 } 2861 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2862 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2863 2864 /* 2865 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2866 w2 = F(x1 + dx) - F(x1) 2867 */ 2868 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2869 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2870 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2871 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2872 2873 /* 2874 Loop over rows of vector, putting results into Jacobian matrix 2875 */ 2876 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2877 for (l=0; l<coloring->nrows[k]; l++) { 2878 row = bs*coloring->rows[k][l]; /* local row index */ 2879 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2880 for (j=0; j<bs; j++) { 2881 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2882 srows[j] = row + start + j; 2883 } 2884 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2885 } 2886 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2887 } 2888 } /* endof for each color */ 2889 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2890 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2892 ierr = PetscFree(srows);CHKERRQ(ierr); 2893 2894 coloring->currentcolor = -1; 2895 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2896 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2897 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901 /* -------------------------------------------------------------------*/ 2902 static struct _MatOps MatOps_Values = { 2903 MatSetValues_SeqBAIJ, 2904 MatGetRow_SeqBAIJ, 2905 MatRestoreRow_SeqBAIJ, 2906 MatMult_SeqBAIJ_N, 2907 /* 4*/ MatMultAdd_SeqBAIJ_N, 2908 MatMultTranspose_SeqBAIJ, 2909 MatMultTransposeAdd_SeqBAIJ, 2910 0, 2911 0, 2912 0, 2913 /* 10*/ 0, 2914 MatLUFactor_SeqBAIJ, 2915 0, 2916 0, 2917 MatTranspose_SeqBAIJ, 2918 /* 15*/ MatGetInfo_SeqBAIJ, 2919 MatEqual_SeqBAIJ, 2920 MatGetDiagonal_SeqBAIJ, 2921 MatDiagonalScale_SeqBAIJ, 2922 MatNorm_SeqBAIJ, 2923 /* 20*/ 0, 2924 MatAssemblyEnd_SeqBAIJ, 2925 MatSetOption_SeqBAIJ, 2926 MatZeroEntries_SeqBAIJ, 2927 /* 24*/ MatZeroRows_SeqBAIJ, 2928 0, 2929 0, 2930 0, 2931 0, 2932 /* 29*/ MatSetUp_SeqBAIJ, 2933 0, 2934 0, 2935 0, 2936 0, 2937 /* 34*/ MatDuplicate_SeqBAIJ, 2938 0, 2939 0, 2940 MatILUFactor_SeqBAIJ, 2941 0, 2942 /* 39*/ MatAXPY_SeqBAIJ, 2943 MatGetSubMatrices_SeqBAIJ, 2944 MatIncreaseOverlap_SeqBAIJ, 2945 MatGetValues_SeqBAIJ, 2946 MatCopy_SeqBAIJ, 2947 /* 44*/ 0, 2948 MatScale_SeqBAIJ, 2949 0, 2950 0, 2951 MatZeroRowsColumns_SeqBAIJ, 2952 /* 49*/ 0, 2953 MatGetRowIJ_SeqBAIJ, 2954 MatRestoreRowIJ_SeqBAIJ, 2955 MatGetColumnIJ_SeqBAIJ, 2956 MatRestoreColumnIJ_SeqBAIJ, 2957 /* 54*/ MatFDColoringCreate_SeqAIJ, 2958 0, 2959 0, 2960 0, 2961 MatSetValuesBlocked_SeqBAIJ, 2962 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2963 MatDestroy_SeqBAIJ, 2964 MatView_SeqBAIJ, 2965 0, 2966 0, 2967 /* 64*/ 0, 2968 0, 2969 0, 2970 0, 2971 0, 2972 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2973 0, 2974 MatConvert_Basic, 2975 0, 2976 0, 2977 /* 74*/ 0, 2978 MatFDColoringApply_BAIJ, 2979 0, 2980 0, 2981 0, 2982 /* 79*/ 0, 2983 0, 2984 0, 2985 0, 2986 MatLoad_SeqBAIJ, 2987 /* 84*/ 0, 2988 0, 2989 0, 2990 0, 2991 0, 2992 /* 89*/ 0, 2993 0, 2994 0, 2995 0, 2996 0, 2997 /* 94*/ 0, 2998 0, 2999 0, 3000 0, 3001 0, 3002 /* 99*/ 0, 3003 0, 3004 0, 3005 0, 3006 0, 3007 /*104*/ 0, 3008 MatRealPart_SeqBAIJ, 3009 MatImaginaryPart_SeqBAIJ, 3010 0, 3011 0, 3012 /*109*/ 0, 3013 0, 3014 0, 3015 0, 3016 MatMissingDiagonal_SeqBAIJ, 3017 /*114*/ 0, 3018 0, 3019 0, 3020 0, 3021 0, 3022 /*119*/ 0, 3023 0, 3024 MatMultHermitianTranspose_SeqBAIJ, 3025 MatMultHermitianTransposeAdd_SeqBAIJ, 3026 0, 3027 /*124*/ 0, 3028 0, 3029 MatInvertBlockDiagonal_SeqBAIJ 3030 }; 3031 3032 EXTERN_C_BEGIN 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3035 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3036 { 3037 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3038 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3039 PetscErrorCode ierr; 3040 3041 PetscFunctionBegin; 3042 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3043 3044 /* allocate space for values if not already there */ 3045 if (!aij->saved_values) { 3046 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3047 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3048 } 3049 3050 /* copy values over */ 3051 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 EXTERN_C_END 3055 3056 EXTERN_C_BEGIN 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3059 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3060 { 3061 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3062 PetscErrorCode ierr; 3063 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3064 3065 PetscFunctionBegin; 3066 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3067 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3068 3069 /* copy values over */ 3070 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3071 PetscFunctionReturn(0); 3072 } 3073 EXTERN_C_END 3074 3075 EXTERN_C_BEGIN 3076 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3077 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3078 EXTERN_C_END 3079 3080 EXTERN_C_BEGIN 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3083 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3084 { 3085 Mat_SeqBAIJ *b; 3086 PetscErrorCode ierr; 3087 PetscInt i,mbs,nbs,bs2; 3088 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3089 3090 PetscFunctionBegin; 3091 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3092 if (nz == MAT_SKIP_ALLOCATION) { 3093 skipallocation = PETSC_TRUE; 3094 nz = 0; 3095 } 3096 3097 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3098 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3099 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3100 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3101 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3102 3103 B->preallocated = PETSC_TRUE; 3104 3105 mbs = B->rmap->n/bs; 3106 nbs = B->cmap->n/bs; 3107 bs2 = bs*bs; 3108 3109 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3110 3111 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3112 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3113 if (nnz) { 3114 for (i=0; i<mbs; i++) { 3115 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3116 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3117 } 3118 } 3119 3120 b = (Mat_SeqBAIJ*)B->data; 3121 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3122 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3123 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3124 3125 if (!flg) { 3126 switch (bs) { 3127 case 1: 3128 B->ops->mult = MatMult_SeqBAIJ_1; 3129 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3130 B->ops->sor = MatSOR_SeqBAIJ_1; 3131 break; 3132 case 2: 3133 B->ops->mult = MatMult_SeqBAIJ_2; 3134 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3135 B->ops->sor = MatSOR_SeqBAIJ_2; 3136 break; 3137 case 3: 3138 B->ops->mult = MatMult_SeqBAIJ_3; 3139 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3140 B->ops->sor = MatSOR_SeqBAIJ_3; 3141 break; 3142 case 4: 3143 B->ops->mult = MatMult_SeqBAIJ_4; 3144 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3145 B->ops->sor = MatSOR_SeqBAIJ_4; 3146 break; 3147 case 5: 3148 B->ops->mult = MatMult_SeqBAIJ_5; 3149 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3150 B->ops->sor = MatSOR_SeqBAIJ_5; 3151 break; 3152 case 6: 3153 B->ops->mult = MatMult_SeqBAIJ_6; 3154 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3155 B->ops->sor = MatSOR_SeqBAIJ_6; 3156 break; 3157 case 7: 3158 B->ops->mult = MatMult_SeqBAIJ_7; 3159 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3160 B->ops->sor = MatSOR_SeqBAIJ_7; 3161 break; 3162 case 15: 3163 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3164 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3165 B->ops->sor = MatSOR_SeqBAIJ_N; 3166 break; 3167 default: 3168 B->ops->mult = MatMult_SeqBAIJ_N; 3169 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3170 B->ops->sor = MatSOR_SeqBAIJ_N; 3171 break; 3172 } 3173 } 3174 b->mbs = mbs; 3175 b->nbs = nbs; 3176 if (!skipallocation) { 3177 if (!b->imax) { 3178 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3179 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3180 b->free_imax_ilen = PETSC_TRUE; 3181 } 3182 /* b->ilen will count nonzeros in each block row so far. */ 3183 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3184 if (!nnz) { 3185 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3186 else if (nz < 0) nz = 1; 3187 for (i=0; i<mbs; i++) b->imax[i] = nz; 3188 nz = nz*mbs; 3189 } else { 3190 nz = 0; 3191 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3192 } 3193 3194 /* allocate the matrix space */ 3195 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3196 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3197 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3198 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3199 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3200 b->singlemalloc = PETSC_TRUE; 3201 b->i[0] = 0; 3202 for (i=1; i<mbs+1; i++) { 3203 b->i[i] = b->i[i-1] + b->imax[i-1]; 3204 } 3205 b->free_a = PETSC_TRUE; 3206 b->free_ij = PETSC_TRUE; 3207 } else { 3208 b->free_a = PETSC_FALSE; 3209 b->free_ij = PETSC_FALSE; 3210 } 3211 3212 b->bs2 = bs2; 3213 b->mbs = mbs; 3214 b->nz = 0; 3215 b->maxnz = nz; 3216 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3217 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3218 PetscFunctionReturn(0); 3219 } 3220 EXTERN_C_END 3221 3222 EXTERN_C_BEGIN 3223 #undef __FUNCT__ 3224 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3225 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3226 { 3227 PetscInt i,m,nz,nz_max=0,*nnz; 3228 PetscScalar *values=0; 3229 PetscErrorCode ierr; 3230 3231 PetscFunctionBegin; 3232 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3233 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3234 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3235 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3236 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3237 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3238 m = B->rmap->n/bs; 3239 3240 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3241 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3242 for (i=0; i<m; i++) { 3243 nz = ii[i+1]- ii[i]; 3244 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3245 nz_max = PetscMax(nz_max, nz); 3246 nnz[i] = nz; 3247 } 3248 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3249 ierr = PetscFree(nnz);CHKERRQ(ierr); 3250 3251 values = (PetscScalar*)V; 3252 if (!values) { 3253 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3254 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3255 } 3256 for (i=0; i<m; i++) { 3257 PetscInt ncols = ii[i+1] - ii[i]; 3258 const PetscInt *icols = jj + ii[i]; 3259 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3260 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3261 } 3262 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3263 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 EXTERN_C_END 3269 3270 3271 EXTERN_C_BEGIN 3272 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3273 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3274 #if defined(PETSC_HAVE_MUMPS) 3275 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3276 #endif 3277 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3278 EXTERN_C_END 3279 3280 /*MC 3281 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3282 block sparse compressed row format. 3283 3284 Options Database Keys: 3285 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3286 3287 Level: beginner 3288 3289 .seealso: MatCreateSeqBAIJ() 3290 M*/ 3291 3292 EXTERN_C_BEGIN 3293 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3294 EXTERN_C_END 3295 3296 EXTERN_C_BEGIN 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatCreate_SeqBAIJ" 3299 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3300 { 3301 PetscErrorCode ierr; 3302 PetscMPIInt size; 3303 Mat_SeqBAIJ *b; 3304 3305 PetscFunctionBegin; 3306 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3307 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3308 3309 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3310 B->data = (void*)b; 3311 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3312 b->row = 0; 3313 b->col = 0; 3314 b->icol = 0; 3315 b->reallocs = 0; 3316 b->saved_values = 0; 3317 3318 b->roworiented = PETSC_TRUE; 3319 b->nonew = 0; 3320 b->diag = 0; 3321 b->solve_work = 0; 3322 b->mult_work = 0; 3323 B->spptr = 0; 3324 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3325 b->keepnonzeropattern = PETSC_FALSE; 3326 b->xtoy = 0; 3327 b->XtoY = 0; 3328 B->same_nonzero = PETSC_FALSE; 3329 3330 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3331 "MatGetFactorAvailable_seqbaij_petsc", 3332 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3334 "MatGetFactor_seqbaij_petsc", 3335 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3337 "MatGetFactor_seqbaij_bstrm", 3338 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3339 #if defined(PETSC_HAVE_MUMPS) 3340 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3341 #endif 3342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3343 "MatInvertBlockDiagonal_SeqBAIJ", 3344 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3346 "MatStoreValues_SeqBAIJ", 3347 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3349 "MatRetrieveValues_SeqBAIJ", 3350 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3352 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3353 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3355 "MatConvert_SeqBAIJ_SeqAIJ", 3356 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3358 "MatConvert_SeqBAIJ_SeqSBAIJ", 3359 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3361 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3362 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3364 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3365 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3367 "MatConvert_SeqBAIJ_SeqBSTRM", 3368 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3370 "MatIsTranspose_SeqBAIJ", 3371 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3372 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 EXTERN_C_END 3376 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3379 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3380 { 3381 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3382 PetscErrorCode ierr; 3383 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3384 3385 PetscFunctionBegin; 3386 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3387 3388 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3389 c->imax = a->imax; 3390 c->ilen = a->ilen; 3391 c->free_imax_ilen = PETSC_FALSE; 3392 } else { 3393 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3394 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3395 for (i=0; i<mbs; i++) { 3396 c->imax[i] = a->imax[i]; 3397 c->ilen[i] = a->ilen[i]; 3398 } 3399 c->free_imax_ilen = PETSC_TRUE; 3400 } 3401 3402 /* allocate the matrix space */ 3403 if (mallocmatspace) { 3404 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3405 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3407 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3408 c->i = a->i; 3409 c->j = a->j; 3410 c->singlemalloc = PETSC_FALSE; 3411 c->free_a = PETSC_TRUE; 3412 c->free_ij = PETSC_FALSE; 3413 c->parent = A; 3414 C->preallocated = PETSC_TRUE; 3415 C->assembled = PETSC_TRUE; 3416 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3417 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3418 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3419 } else { 3420 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3421 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3422 c->singlemalloc = PETSC_TRUE; 3423 c->free_a = PETSC_TRUE; 3424 c->free_ij = PETSC_TRUE; 3425 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3426 if (mbs > 0) { 3427 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3428 if (cpvalues == MAT_COPY_VALUES) { 3429 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3430 } else { 3431 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3432 } 3433 } 3434 C->preallocated = PETSC_TRUE; 3435 C->assembled = PETSC_TRUE; 3436 } 3437 } 3438 3439 c->roworiented = a->roworiented; 3440 c->nonew = a->nonew; 3441 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3442 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3443 c->bs2 = a->bs2; 3444 c->mbs = a->mbs; 3445 c->nbs = a->nbs; 3446 3447 if (a->diag) { 3448 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3449 c->diag = a->diag; 3450 c->free_diag = PETSC_FALSE; 3451 } else { 3452 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3453 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3454 for (i=0; i<mbs; i++) { 3455 c->diag[i] = a->diag[i]; 3456 } 3457 c->free_diag = PETSC_TRUE; 3458 } 3459 } else c->diag = 0; 3460 c->nz = a->nz; 3461 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3462 c->solve_work = 0; 3463 c->mult_work = 0; 3464 3465 c->compressedrow.use = a->compressedrow.use; 3466 c->compressedrow.nrows = a->compressedrow.nrows; 3467 c->compressedrow.check = a->compressedrow.check; 3468 if (a->compressedrow.use) { 3469 i = a->compressedrow.nrows; 3470 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3471 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3472 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3473 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3474 } else { 3475 c->compressedrow.use = PETSC_FALSE; 3476 c->compressedrow.i = PETSC_NULL; 3477 c->compressedrow.rindex = PETSC_NULL; 3478 } 3479 C->same_nonzero = A->same_nonzero; 3480 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3481 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 #undef __FUNCT__ 3486 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3487 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3488 { 3489 PetscErrorCode ierr; 3490 3491 PetscFunctionBegin; 3492 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3493 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3494 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3495 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3496 PetscFunctionReturn(0); 3497 } 3498 3499 #undef __FUNCT__ 3500 #define __FUNCT__ "MatLoad_SeqBAIJ" 3501 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3502 { 3503 Mat_SeqBAIJ *a; 3504 PetscErrorCode ierr; 3505 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3506 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3507 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3508 PetscInt *masked,nmask,tmp,bs2,ishift; 3509 PetscMPIInt size; 3510 int fd; 3511 PetscScalar *aa; 3512 MPI_Comm comm = ((PetscObject)viewer)->comm; 3513 3514 PetscFunctionBegin; 3515 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3516 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3517 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3518 bs2 = bs*bs; 3519 3520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3521 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3522 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3523 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3524 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3525 M = header[1]; N = header[2]; nz = header[3]; 3526 3527 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3528 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3529 3530 /* 3531 This code adds extra rows to make sure the number of rows is 3532 divisible by the blocksize 3533 */ 3534 mbs = M/bs; 3535 extra_rows = bs - M + bs*(mbs); 3536 if (extra_rows == bs) extra_rows = 0; 3537 else mbs++; 3538 if (extra_rows) { 3539 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3540 } 3541 3542 /* Set global sizes if not already set */ 3543 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3544 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3545 } else { /* Check if the matrix global sizes are correct */ 3546 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3547 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3548 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3549 } 3550 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3551 } 3552 3553 /* read in row lengths */ 3554 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3555 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3556 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3557 3558 /* read in column indices */ 3559 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3560 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3561 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3562 3563 /* loop over row lengths determining block row lengths */ 3564 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3565 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3566 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3567 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3568 rowcount = 0; 3569 nzcount = 0; 3570 for (i=0; i<mbs; i++) { 3571 nmask = 0; 3572 for (j=0; j<bs; j++) { 3573 kmax = rowlengths[rowcount]; 3574 for (k=0; k<kmax; k++) { 3575 tmp = jj[nzcount++]/bs; 3576 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3577 } 3578 rowcount++; 3579 } 3580 browlengths[i] += nmask; 3581 /* zero out the mask elements we set */ 3582 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3583 } 3584 3585 /* Do preallocation */ 3586 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3587 a = (Mat_SeqBAIJ*)newmat->data; 3588 3589 /* set matrix "i" values */ 3590 a->i[0] = 0; 3591 for (i=1; i<= mbs; i++) { 3592 a->i[i] = a->i[i-1] + browlengths[i-1]; 3593 a->ilen[i-1] = browlengths[i-1]; 3594 } 3595 a->nz = 0; 3596 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3597 3598 /* read in nonzero values */ 3599 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3600 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3601 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3602 3603 /* set "a" and "j" values into matrix */ 3604 nzcount = 0; jcount = 0; 3605 for (i=0; i<mbs; i++) { 3606 nzcountb = nzcount; 3607 nmask = 0; 3608 for (j=0; j<bs; j++) { 3609 kmax = rowlengths[i*bs+j]; 3610 for (k=0; k<kmax; k++) { 3611 tmp = jj[nzcount++]/bs; 3612 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3613 } 3614 } 3615 /* sort the masked values */ 3616 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3617 3618 /* set "j" values into matrix */ 3619 maskcount = 1; 3620 for (j=0; j<nmask; j++) { 3621 a->j[jcount++] = masked[j]; 3622 mask[masked[j]] = maskcount++; 3623 } 3624 /* set "a" values into matrix */ 3625 ishift = bs2*a->i[i]; 3626 for (j=0; j<bs; j++) { 3627 kmax = rowlengths[i*bs+j]; 3628 for (k=0; k<kmax; k++) { 3629 tmp = jj[nzcountb]/bs ; 3630 block = mask[tmp] - 1; 3631 point = jj[nzcountb] - bs*tmp; 3632 idx = ishift + bs2*block + j + bs*point; 3633 a->a[idx] = (MatScalar)aa[nzcountb++]; 3634 } 3635 } 3636 /* zero out the mask elements we set */ 3637 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3638 } 3639 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3640 3641 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3642 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3643 ierr = PetscFree(aa);CHKERRQ(ierr); 3644 ierr = PetscFree(jj);CHKERRQ(ierr); 3645 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3646 3647 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3649 PetscFunctionReturn(0); 3650 } 3651 3652 #undef __FUNCT__ 3653 #define __FUNCT__ "MatCreateSeqBAIJ" 3654 /*@C 3655 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3656 compressed row) format. For good matrix assembly performance the 3657 user should preallocate the matrix storage by setting the parameter nz 3658 (or the array nnz). By setting these parameters accurately, performance 3659 during matrix assembly can be increased by more than a factor of 50. 3660 3661 Collective on MPI_Comm 3662 3663 Input Parameters: 3664 + comm - MPI communicator, set to PETSC_COMM_SELF 3665 . bs - size of block 3666 . m - number of rows 3667 . n - number of columns 3668 . nz - number of nonzero blocks per block row (same for all rows) 3669 - nnz - array containing the number of nonzero blocks in the various block rows 3670 (possibly different for each block row) or PETSC_NULL 3671 3672 Output Parameter: 3673 . A - the matrix 3674 3675 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3676 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3677 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3678 3679 Options Database Keys: 3680 . -mat_no_unroll - uses code that does not unroll the loops in the 3681 block calculations (much slower) 3682 . -mat_block_size - size of the blocks to use 3683 3684 Level: intermediate 3685 3686 Notes: 3687 The number of rows and columns must be divisible by blocksize. 3688 3689 If the nnz parameter is given then the nz parameter is ignored 3690 3691 A nonzero block is any block that as 1 or more nonzeros in it 3692 3693 The block AIJ format is fully compatible with standard Fortran 77 3694 storage. That is, the stored row and column indices can begin at 3695 either one (as in Fortran) or zero. See the users' manual for details. 3696 3697 Specify the preallocated storage with either nz or nnz (not both). 3698 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3699 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3700 matrices. 3701 3702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3703 @*/ 3704 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3705 { 3706 PetscErrorCode ierr; 3707 3708 PetscFunctionBegin; 3709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3710 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3711 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3712 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3713 PetscFunctionReturn(0); 3714 } 3715 3716 #undef __FUNCT__ 3717 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3718 /*@C 3719 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3720 per row in the matrix. For good matrix assembly performance the 3721 user should preallocate the matrix storage by setting the parameter nz 3722 (or the array nnz). By setting these parameters accurately, performance 3723 during matrix assembly can be increased by more than a factor of 50. 3724 3725 Collective on MPI_Comm 3726 3727 Input Parameters: 3728 + A - the matrix 3729 . bs - size of block 3730 . nz - number of block nonzeros per block row (same for all rows) 3731 - nnz - array containing the number of block nonzeros in the various block rows 3732 (possibly different for each block row) or PETSC_NULL 3733 3734 Options Database Keys: 3735 . -mat_no_unroll - uses code that does not unroll the loops in the 3736 block calculations (much slower) 3737 . -mat_block_size - size of the blocks to use 3738 3739 Level: intermediate 3740 3741 Notes: 3742 If the nnz parameter is given then the nz parameter is ignored 3743 3744 You can call MatGetInfo() to get information on how effective the preallocation was; 3745 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3746 You can also run with the option -info and look for messages with the string 3747 malloc in them to see if additional memory allocation was needed. 3748 3749 The block AIJ format is fully compatible with standard Fortran 77 3750 storage. That is, the stored row and column indices can begin at 3751 either one (as in Fortran) or zero. See the users' manual for details. 3752 3753 Specify the preallocated storage with either nz or nnz (not both). 3754 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3755 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3756 3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3758 @*/ 3759 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3760 { 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3765 PetscValidType(B,1); 3766 PetscValidLogicalCollectiveInt(B,bs,2); 3767 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3773 /*@C 3774 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3775 (the default sequential PETSc format). 3776 3777 Collective on MPI_Comm 3778 3779 Input Parameters: 3780 + A - the matrix 3781 . i - the indices into j for the start of each local row (starts with zero) 3782 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3783 - v - optional values in the matrix 3784 3785 Level: developer 3786 3787 .keywords: matrix, aij, compressed row, sparse 3788 3789 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3790 @*/ 3791 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3792 { 3793 PetscErrorCode ierr; 3794 3795 PetscFunctionBegin; 3796 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3797 PetscValidType(B,1); 3798 PetscValidLogicalCollectiveInt(B,bs,2); 3799 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3800 PetscFunctionReturn(0); 3801 } 3802 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3806 /*@ 3807 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3808 3809 Collective on MPI_Comm 3810 3811 Input Parameters: 3812 + comm - must be an MPI communicator of size 1 3813 . bs - size of block 3814 . m - number of rows 3815 . n - number of columns 3816 . i - row indices 3817 . j - column indices 3818 - a - matrix values 3819 3820 Output Parameter: 3821 . mat - the matrix 3822 3823 Level: advanced 3824 3825 Notes: 3826 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3827 once the matrix is destroyed 3828 3829 You cannot set new nonzero locations into this matrix, that will generate an error. 3830 3831 The i and j indices are 0 based 3832 3833 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3834 3835 3836 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3837 3838 @*/ 3839 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3840 { 3841 PetscErrorCode ierr; 3842 PetscInt ii; 3843 Mat_SeqBAIJ *baij; 3844 3845 PetscFunctionBegin; 3846 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3847 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3848 3849 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3850 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3851 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3852 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3853 baij = (Mat_SeqBAIJ*)(*mat)->data; 3854 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3855 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3856 3857 baij->i = i; 3858 baij->j = j; 3859 baij->a = a; 3860 baij->singlemalloc = PETSC_FALSE; 3861 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3862 baij->free_a = PETSC_FALSE; 3863 baij->free_ij = PETSC_FALSE; 3864 3865 for (ii=0; ii<m; ii++) { 3866 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3867 #if defined(PETSC_USE_DEBUG) 3868 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3869 #endif 3870 } 3871 #if defined(PETSC_USE_DEBUG) 3872 for (ii=0; ii<baij->i[m]; ii++) { 3873 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3874 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3875 } 3876 #endif 3877 3878 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 PetscFunctionReturn(0); 3881 } 3882