1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 166 x[i2] = idiag[0]*s1; 167 idiag += 1; 168 i2 += 1; 169 } 170 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 171 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 172 } 173 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 174 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 175 i2 = 0; 176 mdiag = a->idiag+a->mbs; 177 for (i=0; i<m; i++) { 178 x1 = x[i2]; 179 x[i2] = mdiag[0]*x1; 180 mdiag += 1; 181 i2 += 1; 182 } 183 ierr = PetscLogFlops(m);CHKERRQ(ierr); 184 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 185 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 186 } 187 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 188 idiag = a->idiag+a->mbs - 1; 189 i2 = m - 1; 190 x1 = x[i2]; 191 x[i2] = idiag[0]*x1; 192 idiag -= 1; 193 i2 -= 1; 194 for (i=m-2; i>=0; i--) { 195 v = aa + (diag[i]+1); 196 vi = aj + diag[i] + 1; 197 nz = ai[i+1] - diag[i] - 1; 198 s1 = x[i2]; 199 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 200 x[i2] = idiag[0]*s1; 201 idiag -= 1; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 PetscScalar *x,x1,x2,s1,s2; 218 const PetscScalar *b; 219 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 220 PetscErrorCode ierr; 221 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 222 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 223 224 PetscFunctionBegin; 225 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 226 its = its*lits; 227 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 228 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 229 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 230 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 231 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 232 233 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 234 235 diag = a->diag; 236 idiag = a->idiag; 237 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 239 240 if (flag & SOR_ZERO_INITIAL_GUESS) { 241 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 242 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 243 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 244 i2 = 2; 245 idiag += 4; 246 for (i=1; i<m; i++) { 247 v = aa + 4*ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s1 = b[i2]; s2 = b[i2+1]; 251 for (j=0; j<nz; j++) { 252 idx = 2*vi[j]; 253 it = 4*j; 254 x1 = x[idx]; x2 = x[1+idx]; 255 s1 -= v[it]*x1 + v[it+2]*x2; 256 s2 -= v[it+1]*x1 + v[it+3]*x2; 257 } 258 x[i2] = idiag[0]*s1 + idiag[2]*s2; 259 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 260 idiag += 4; 261 i2 += 2; 262 } 263 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 264 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 265 } 266 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 267 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 268 i2 = 0; 269 mdiag = a->idiag+4*a->mbs; 270 for (i=0; i<m; i++) { 271 x1 = x[i2]; x2 = x[i2+1]; 272 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 273 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 274 mdiag += 4; 275 i2 += 2; 276 } 277 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 278 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 279 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 280 } 281 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 282 idiag = a->idiag+4*a->mbs - 4; 283 i2 = 2*m - 2; 284 x1 = x[i2]; x2 = x[i2+1]; 285 x[i2] = idiag[0]*x1 + idiag[2]*x2; 286 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 287 idiag -= 4; 288 i2 -= 2; 289 for (i=m-2; i>=0; i--) { 290 v = aa + 4*(diag[i]+1); 291 vi = aj + diag[i] + 1; 292 nz = ai[i+1] - diag[i] - 1; 293 s1 = x[i2]; s2 = x[i2+1]; 294 for (j=0; j<nz; j++) { 295 idx = 2*vi[j]; 296 it = 4*j; 297 x1 = x[idx]; x2 = x[1+idx]; 298 s1 -= v[it]*x1 + v[it+2]*x2; 299 s2 -= v[it+1]*x1 + v[it+3]*x2; 300 } 301 x[i2] = idiag[0]*s1 + idiag[2]*s2; 302 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 303 idiag -= 4; 304 i2 -= 2; 305 } 306 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 307 } 308 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 317 { 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 319 PetscScalar *x,x1,x2,x3,s1,s2,s3; 320 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 321 const PetscScalar *b; 322 PetscErrorCode ierr; 323 PetscInt m = a->mbs,i,i2,nz,idx; 324 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325 326 PetscFunctionBegin; 327 its = its*lits; 328 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 329 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 332 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334 335 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 336 337 diag = a->diag; 338 idiag = a->idiag; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 341 342 if (flag & SOR_ZERO_INITIAL_GUESS) { 343 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 344 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347 i2 = 3; 348 idiag += 9; 349 for (i=1; i<m; i++) { 350 v = aa + 9*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354 while (nz--) { 355 idx = 3*(*vi++); 356 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365 idiag += 9; 366 i2 += 3; 367 } 368 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 370 } 371 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373 i2 = 0; 374 mdiag = a->idiag+9*a->mbs; 375 for (i=0; i<m; i++) { 376 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380 mdiag += 9; 381 i2 += 3; 382 } 383 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 384 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 386 } 387 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 388 idiag = a->idiag+9*a->mbs - 9; 389 i2 = 3*m - 3; 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394 idiag -= 9; 395 i2 -= 3; 396 for (i=m-2; i>=0; i--) { 397 v = aa + 9*(diag[i]+1); 398 vi = aj + diag[i] + 1; 399 nz = ai[i+1] - diag[i] - 1; 400 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401 while (nz--) { 402 idx = 3*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407 v += 9; 408 } 409 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412 idiag -= 9; 413 i2 -= 3; 414 } 415 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 416 } 417 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 418 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 429 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 430 const PetscScalar *b; 431 PetscErrorCode ierr; 432 PetscInt m = a->mbs,i,i2,nz,idx; 433 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 434 435 PetscFunctionBegin; 436 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 437 its = its*lits; 438 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 439 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 440 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 441 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 442 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 443 444 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 445 446 diag = a->diag; 447 idiag = a->idiag; 448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 450 451 if (flag & SOR_ZERO_INITIAL_GUESS) { 452 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 453 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 454 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 455 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 456 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 457 i2 = 4; 458 idiag += 16; 459 for (i=1; i<m; i++) { 460 v = aa + 16*ai[i]; 461 vi = aj + ai[i]; 462 nz = diag[i] - ai[i]; 463 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 464 while (nz--) { 465 idx = 4*(*vi++); 466 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 467 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 468 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 469 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 470 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 471 v += 16; 472 } 473 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 474 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 475 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 476 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 477 idiag += 16; 478 i2 += 4; 479 } 480 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 481 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 482 } 483 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 484 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 485 i2 = 0; 486 mdiag = a->idiag+16*a->mbs; 487 for (i=0; i<m; i++) { 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 489 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 490 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 491 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 492 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 493 mdiag += 16; 494 i2 += 4; 495 } 496 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 497 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 498 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 499 } 500 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 501 idiag = a->idiag+16*a->mbs - 16; 502 i2 = 4*m - 4; 503 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 504 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 505 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 506 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 507 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 508 idiag -= 16; 509 i2 -= 4; 510 for (i=m-2; i>=0; i--) { 511 v = aa + 16*(diag[i]+1); 512 vi = aj + diag[i] + 1; 513 nz = ai[i+1] - diag[i] - 1; 514 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 515 while (nz--) { 516 idx = 4*(*vi++); 517 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 518 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 519 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 520 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 521 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 522 v += 16; 523 } 524 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 525 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 526 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 527 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 532 } 533 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 534 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 545 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 546 const PetscScalar *b; 547 PetscErrorCode ierr; 548 PetscInt m = a->mbs,i,i2,nz,idx; 549 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 550 551 PetscFunctionBegin; 552 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 553 its = its*lits; 554 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 555 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 556 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 557 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 558 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 559 560 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 561 562 diag = a->diag; 563 idiag = a->idiag; 564 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 565 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 566 567 if (flag & SOR_ZERO_INITIAL_GUESS) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 570 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 571 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 572 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 573 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 574 i2 = 5; 575 idiag += 25; 576 for (i=1; i<m; i++) { 577 v = aa + 25*ai[i]; 578 vi = aj + ai[i]; 579 nz = diag[i] - ai[i]; 580 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 581 while (nz--) { 582 idx = 5*(*vi++); 583 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 584 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 585 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 586 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 587 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 588 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 589 v += 25; 590 } 591 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 592 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 593 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 594 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 595 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 596 idiag += 25; 597 i2 += 5; 598 } 599 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 600 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 601 } 602 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 603 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 604 i2 = 0; 605 mdiag = a->idiag+25*a->mbs; 606 for (i=0; i<m; i++) { 607 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 608 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 609 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 610 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 611 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 612 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 613 mdiag += 25; 614 i2 += 5; 615 } 616 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 617 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 618 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 619 } 620 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 621 idiag = a->idiag+25*a->mbs - 25; 622 i2 = 5*m - 5; 623 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 624 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 625 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 626 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 627 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 628 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 629 idiag -= 25; 630 i2 -= 5; 631 for (i=m-2; i>=0; i--) { 632 v = aa + 25*(diag[i]+1); 633 vi = aj + diag[i] + 1; 634 nz = ai[i+1] - diag[i] - 1; 635 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 636 while (nz--) { 637 idx = 5*(*vi++); 638 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 639 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 640 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 641 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 642 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 643 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 644 v += 25; 645 } 646 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 647 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 648 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 649 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 650 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 651 idiag -= 25; 652 i2 -= 5; 653 } 654 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 655 } 656 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 665 { 666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 667 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 668 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 669 const PetscScalar *b; 670 PetscErrorCode ierr; 671 PetscInt m = a->mbs,i,i2,nz,idx; 672 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 673 674 PetscFunctionBegin; 675 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 676 its = its*lits; 677 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 678 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 679 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 680 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 681 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 682 683 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 684 685 diag = a->diag; 686 idiag = a->idiag; 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 689 690 if (flag & SOR_ZERO_INITIAL_GUESS) { 691 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 692 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 693 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 694 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 695 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 696 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 697 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 698 i2 = 6; 699 idiag += 36; 700 for (i=1; i<m; i++) { 701 v = aa + 36*ai[i]; 702 vi = aj + ai[i]; 703 nz = diag[i] - ai[i]; 704 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 705 while (nz--) { 706 idx = 6*(*vi++); 707 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 708 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 709 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 710 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 711 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 712 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 713 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 714 v += 36; 715 } 716 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 717 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 718 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 719 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 720 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 721 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 722 idiag += 36; 723 i2 += 6; 724 } 725 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 726 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 727 } 728 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 729 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 730 i2 = 0; 731 mdiag = a->idiag+36*a->mbs; 732 for (i=0; i<m; i++) { 733 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 734 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 735 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 736 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 737 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 738 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 739 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 740 mdiag += 36; 741 i2 += 6; 742 } 743 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 744 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 745 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 746 } 747 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748 idiag = a->idiag+36*a->mbs - 36; 749 i2 = 6*m - 6; 750 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 751 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 752 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 753 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 754 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 755 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 756 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 757 idiag -= 36; 758 i2 -= 6; 759 for (i=m-2; i>=0; i--) { 760 v = aa + 36*(diag[i]+1); 761 vi = aj + diag[i] + 1; 762 nz = ai[i+1] - diag[i] - 1; 763 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 764 while (nz--) { 765 idx = 6*(*vi++); 766 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 767 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 768 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 769 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 770 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 771 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 772 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 773 v += 36; 774 } 775 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 776 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 777 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 778 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 779 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 780 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 781 idiag -= 36; 782 i2 -= 6; 783 } 784 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 785 } 786 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 795 { 796 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 797 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 798 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 799 const PetscScalar *b; 800 PetscErrorCode ierr; 801 PetscInt m = a->mbs,i,i2,nz,idx; 802 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 803 804 PetscFunctionBegin; 805 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 806 its = its*lits; 807 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 808 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 809 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 810 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 811 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 812 813 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 814 815 diag = a->diag; 816 idiag = a->idiag; 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 819 820 if (flag & SOR_ZERO_INITIAL_GUESS) { 821 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 822 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 823 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 824 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 825 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 826 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 827 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 828 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 829 i2 = 7; 830 idiag += 49; 831 for (i=1; i<m; i++) { 832 v = aa + 49*ai[i]; 833 vi = aj + ai[i]; 834 nz = diag[i] - ai[i]; 835 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 836 while (nz--) { 837 idx = 7*(*vi++); 838 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 839 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 840 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 841 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 842 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 843 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 844 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 845 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 846 v += 49; 847 } 848 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 849 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 850 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 851 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 852 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 853 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 854 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 855 idiag += 49; 856 i2 += 7; 857 } 858 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 859 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 860 } 861 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 862 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 863 i2 = 0; 864 mdiag = a->idiag+49*a->mbs; 865 for (i=0; i<m; i++) { 866 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 867 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 868 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 869 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 870 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 871 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 872 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 873 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 874 mdiag += 49; 875 i2 += 7; 876 } 877 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 878 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 879 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 882 idiag = a->idiag+49*a->mbs - 49; 883 i2 = 7*m - 7; 884 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 885 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 886 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 887 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 888 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 889 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 890 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 891 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 892 idiag -= 49; 893 i2 -= 7; 894 for (i=m-2; i>=0; i--) { 895 v = aa + 49*(diag[i]+1); 896 vi = aj + diag[i] + 1; 897 nz = ai[i+1] - diag[i] - 1; 898 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 899 while (nz--) { 900 idx = 7*(*vi++); 901 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 902 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 903 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 904 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 905 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 906 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 907 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 908 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 909 v += 49; 910 } 911 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 912 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 913 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 914 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 915 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 916 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 917 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 918 idiag -= 49; 919 i2 -= 7; 920 } 921 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 922 } 923 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 924 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 925 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscScalar *x,*work,*w,*workt; 935 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 936 const PetscScalar *b; 937 PetscErrorCode ierr; 938 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 939 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 940 941 PetscFunctionBegin; 942 its = its*lits; 943 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 944 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 945 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 946 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 947 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 948 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 949 950 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 951 952 diag = a->diag; 953 idiag = a->idiag; 954 if (!a->mult_work) { 955 k = PetscMax(A->rmap->n,A->cmap->n); 956 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 957 } 958 work = a->mult_work; 959 if (!a->sor_work) { 960 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 961 } 962 w = a->sor_work; 963 964 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 966 967 if (flag & SOR_ZERO_INITIAL_GUESS) { 968 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 969 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 970 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 971 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 972 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 973 i2 = bs; 974 idiag += bs2; 975 for (i=1; i<m; i++) { 976 v = aa + bs2*ai[i]; 977 vi = aj + ai[i]; 978 nz = diag[i] - ai[i]; 979 980 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 981 /* copy all rows of x that are needed into contiguous space */ 982 workt = work; 983 for (j=0; j<nz; j++) { 984 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 985 workt += bs; 986 } 987 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 988 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 989 while (nz--) { 990 idx = N*(*vi++); 991 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 992 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 993 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 994 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 995 v += N2; 996 } */ 997 998 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 999 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1000 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1001 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1002 1003 idiag += bs2; 1004 i2 += bs; 1005 } 1006 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1007 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1008 } 1009 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1010 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1011 i2 = 0; 1012 mdiag = a->idiag+bs2*a->mbs; 1013 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1014 for (i=0; i<m; i++) { 1015 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1016 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1017 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1018 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1019 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1020 1021 mdiag += bs2; 1022 i2 += bs; 1023 } 1024 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1025 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1026 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1027 } 1028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1029 idiag = a->idiag+bs2*a->mbs - bs2; 1030 i2 = bs*m - bs; 1031 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1033 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1034 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1035 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1036 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1037 idiag -= bs2; 1038 i2 -= bs; 1039 for (i=m-2; i>=0; i--) { 1040 v = aa + bs2*(diag[i]+1); 1041 vi = aj + diag[i] + 1; 1042 nz = ai[i+1] - diag[i] - 1; 1043 1044 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1045 /* copy all rows of x that are needed into contiguous space */ 1046 workt = work; 1047 for (j=0; j<nz; j++) { 1048 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 workt += bs; 1050 } 1051 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1052 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1053 while (nz--) { 1054 idx = N*(*vi++); 1055 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1056 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1057 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1058 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1059 v += N2; 1060 } */ 1061 1062 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1063 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1064 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1065 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1066 idiag -= bs2; 1067 i2 -= bs; 1068 } 1069 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1070 } 1071 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /* 1078 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1079 */ 1080 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1083 #define matsetvaluesblocked4_ matsetvaluesblocked4 1084 #endif 1085 1086 EXTERN_C_BEGIN 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "matsetvaluesblocked4_" 1089 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1090 { 1091 Mat A = *AA; 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1094 PetscInt *ai =a->i,*ailen=a->ilen; 1095 PetscInt *aj =a->j,stepval,lastcol = -1; 1096 const PetscScalar *value = v; 1097 MatScalar *ap,*aa = a->a,*bap; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1101 stepval = (n-1)*4; 1102 for (k=0; k<m; k++) { /* loop over added rows */ 1103 row = im[k]; 1104 rp = aj + ai[row]; 1105 ap = aa + 16*ai[row]; 1106 nrow = ailen[row]; 1107 low = 0; 1108 high = nrow; 1109 for (l=0; l<n; l++) { /* loop over added columns */ 1110 col = in[l]; 1111 if (col <= lastcol) low = 0; 1112 else high = nrow; 1113 lastcol = col; 1114 value = v + k*(stepval+4 + l)*4; 1115 while (high-low > 7) { 1116 t = (low+high)/2; 1117 if (rp[t] > col) high = t; 1118 else low = t; 1119 } 1120 for (i=low; i<high; i++) { 1121 if (rp[i] > col) break; 1122 if (rp[i] == col) { 1123 bap = ap + 16*i; 1124 for (ii=0; ii<4; ii++,value+=stepval) { 1125 for (jj=ii; jj<16; jj+=4) { 1126 bap[jj] += *value++; 1127 } 1128 } 1129 goto noinsert2; 1130 } 1131 } 1132 N = nrow++ - 1; 1133 high++; /* added new column index thus must search to one higher than before */ 1134 /* shift up all the later entries in this row */ 1135 for (ii=N; ii>=i; ii--) { 1136 rp[ii+1] = rp[ii]; 1137 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1138 } 1139 if (N >= i) { 1140 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1141 } 1142 rp[i] = col; 1143 bap = ap + 16*i; 1144 for (ii=0; ii<4; ii++,value+=stepval) { 1145 for (jj=ii; jj<16; jj+=4) { 1146 bap[jj] = *value++; 1147 } 1148 } 1149 noinsert2:; 1150 low = i; 1151 } 1152 ailen[row] = nrow; 1153 } 1154 PetscFunctionReturnVoid(); 1155 } 1156 EXTERN_C_END 1157 1158 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1159 #define matsetvalues4_ MATSETVALUES4 1160 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1161 #define matsetvalues4_ matsetvalues4 1162 #endif 1163 1164 EXTERN_C_BEGIN 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatSetValues4_" 1167 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1168 { 1169 Mat A = *AA; 1170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1171 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1172 PetscInt *ai=a->i,*ailen=a->ilen; 1173 PetscInt *aj=a->j,brow,bcol; 1174 PetscInt ridx,cidx,lastcol = -1; 1175 MatScalar *ap,value,*aa=a->a,*bap; 1176 1177 PetscFunctionBegin; 1178 for (k=0; k<m; k++) { /* loop over added rows */ 1179 row = im[k]; brow = row/4; 1180 rp = aj + ai[brow]; 1181 ap = aa + 16*ai[brow]; 1182 nrow = ailen[brow]; 1183 low = 0; 1184 high = nrow; 1185 for (l=0; l<n; l++) { /* loop over added columns */ 1186 col = in[l]; bcol = col/4; 1187 ridx = row % 4; cidx = col % 4; 1188 value = v[l + k*n]; 1189 if (col <= lastcol) low = 0; 1190 else high = nrow; 1191 lastcol = col; 1192 while (high-low > 7) { 1193 t = (low+high)/2; 1194 if (rp[t] > bcol) high = t; 1195 else low = t; 1196 } 1197 for (i=low; i<high; i++) { 1198 if (rp[i] > bcol) break; 1199 if (rp[i] == bcol) { 1200 bap = ap + 16*i + 4*cidx + ridx; 1201 *bap += value; 1202 goto noinsert1; 1203 } 1204 } 1205 N = nrow++ - 1; 1206 high++; /* added new column thus must search to one higher than before */ 1207 /* shift up all the later entries in this row */ 1208 for (ii=N; ii>=i; ii--) { 1209 rp[ii+1] = rp[ii]; 1210 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1211 } 1212 if (N>=i) { 1213 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1214 } 1215 rp[i] = bcol; 1216 ap[16*i + 4*cidx + ridx] = value; 1217 noinsert1:; 1218 low = i; 1219 } 1220 ailen[brow] = nrow; 1221 } 1222 PetscFunctionReturnVoid(); 1223 } 1224 EXTERN_C_END 1225 1226 /* 1227 Checks for missing diagonals 1228 */ 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1231 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1232 { 1233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 PetscInt *diag,*jj = a->j,i; 1236 1237 PetscFunctionBegin; 1238 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1239 *missing = PETSC_FALSE; 1240 if (A->rmap->n > 0 && !jj) { 1241 *missing = PETSC_TRUE; 1242 if (d) *d = 0; 1243 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1244 } else { 1245 diag = a->diag; 1246 for (i=0; i<a->mbs; i++) { 1247 if (jj[diag[i]] != i) { 1248 *missing = PETSC_TRUE; 1249 if (d) *d = i; 1250 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1251 break; 1252 } 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1260 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1261 { 1262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 PetscInt i,j,m = a->mbs; 1265 1266 PetscFunctionBegin; 1267 if (!a->diag) { 1268 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1269 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1270 a->free_diag = PETSC_TRUE; 1271 } 1272 for (i=0; i<m; i++) { 1273 a->diag[i] = a->i[i+1]; 1274 for (j=a->i[i]; j<a->i[i+1]; j++) { 1275 if (a->j[j] == i) { 1276 a->diag[i] = j; 1277 break; 1278 } 1279 } 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1287 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1288 { 1289 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1290 PetscErrorCode ierr; 1291 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1292 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1293 1294 PetscFunctionBegin; 1295 *nn = n; 1296 if (!ia) PetscFunctionReturn(0); 1297 if (symmetric) { 1298 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1299 nz = tia[n]; 1300 } else { 1301 tia = a->i; tja = a->j; 1302 } 1303 1304 if (!blockcompressed && bs > 1) { 1305 (*nn) *= bs; 1306 /* malloc & create the natural set of indices */ 1307 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1308 if (n) { 1309 (*ia)[0] = 0; 1310 for (j=1; j<bs; j++) { 1311 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1312 } 1313 } 1314 1315 for (i=1; i<n; i++) { 1316 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1317 for (j=1; j<bs; j++) { 1318 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1319 } 1320 } 1321 if (n) { 1322 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1323 } 1324 1325 if (inja) { 1326 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1327 cnt = 0; 1328 for (i=0; i<n; i++) { 1329 for (j=0; j<bs; j++) { 1330 for (k=tia[i]; k<tia[i+1]; k++) { 1331 for (l=0; l<bs; l++) { 1332 (*ja)[cnt++] = bs*tja[k] + l; 1333 } 1334 } 1335 } 1336 } 1337 } 1338 1339 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1340 ierr = PetscFree(tia);CHKERRQ(ierr); 1341 ierr = PetscFree(tja);CHKERRQ(ierr); 1342 } 1343 } else if (oshift == 1) { 1344 if (symmetric) { 1345 nz = tia[A->rmap->n/bs]; 1346 /* add 1 to i and j indices */ 1347 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1348 *ia = tia; 1349 if (ja) { 1350 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1351 *ja = tja; 1352 } 1353 } else { 1354 nz = a->i[A->rmap->n/bs]; 1355 /* malloc space and add 1 to i and j indices */ 1356 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1357 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1358 if (ja) { 1359 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1360 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1361 } 1362 } 1363 } else { 1364 *ia = tia; 1365 if (ja) *ja = tja; 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1372 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1373 { 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 if (!ia) PetscFunctionReturn(0); 1378 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1379 ierr = PetscFree(*ia);CHKERRQ(ierr); 1380 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1387 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1388 { 1389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 #if defined(PETSC_USE_LOG) 1394 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1395 #endif 1396 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1397 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1398 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1399 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1400 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1401 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1402 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1403 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1404 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1405 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1406 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1407 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1408 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1409 1410 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1411 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1412 ierr = PetscFree(A->data);CHKERRQ(ierr); 1413 1414 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1415 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1430 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 switch (op) { 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 break; 1440 case MAT_KEEP_NONZERO_PATTERN: 1441 a->keepnonzeropattern = flg; 1442 break; 1443 case MAT_NEW_NONZERO_LOCATIONS: 1444 a->nonew = (flg ? 0 : 1); 1445 break; 1446 case MAT_NEW_NONZERO_LOCATION_ERR: 1447 a->nonew = (flg ? -1 : 0); 1448 break; 1449 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1450 a->nonew = (flg ? -2 : 0); 1451 break; 1452 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1453 a->nounused = (flg ? -1 : 0); 1454 break; 1455 case MAT_CHECK_COMPRESSED_ROW: 1456 a->compressedrow.check = flg; 1457 break; 1458 case MAT_NEW_DIAGONALS: 1459 case MAT_IGNORE_OFF_PROC_ENTRIES: 1460 case MAT_USE_HASH_TABLE: 1461 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1462 break; 1463 case MAT_SPD: 1464 case MAT_SYMMETRIC: 1465 case MAT_STRUCTURALLY_SYMMETRIC: 1466 case MAT_HERMITIAN: 1467 case MAT_SYMMETRY_ETERNAL: 1468 /* These options are handled directly by MatSetOption() */ 1469 break; 1470 default: 1471 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1478 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1479 { 1480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1483 MatScalar *aa,*aa_i; 1484 PetscScalar *v_i; 1485 1486 PetscFunctionBegin; 1487 bs = A->rmap->bs; 1488 ai = a->i; 1489 aj = a->j; 1490 aa = a->a; 1491 bs2 = a->bs2; 1492 1493 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1494 1495 bn = row/bs; /* Block number */ 1496 bp = row % bs; /* Block Position */ 1497 M = ai[bn+1] - ai[bn]; 1498 *nz = bs*M; 1499 1500 if (v) { 1501 *v = 0; 1502 if (*nz) { 1503 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1504 for (i=0; i<M; i++) { /* for each block in the block row */ 1505 v_i = *v + i*bs; 1506 aa_i = aa + bs2*(ai[bn] + i); 1507 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1508 } 1509 } 1510 } 1511 1512 if (idx) { 1513 *idx = 0; 1514 if (*nz) { 1515 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1516 for (i=0; i<M; i++) { /* for each block in the block row */ 1517 idx_i = *idx + i*bs; 1518 itmp = bs*aj[ai[bn] + i]; 1519 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1520 } 1521 } 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1528 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1529 { 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1534 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1535 PetscFunctionReturn(0); 1536 } 1537 1538 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1542 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1543 { 1544 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1545 Mat C; 1546 PetscErrorCode ierr; 1547 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1548 PetscInt *rows,*cols,bs2=a->bs2; 1549 MatScalar *array; 1550 1551 PetscFunctionBegin; 1552 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1553 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1554 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1555 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1556 1557 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1558 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1559 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1560 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1561 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1562 ierr = PetscFree(col);CHKERRQ(ierr); 1563 } else { 1564 C = *B; 1565 } 1566 1567 array = a->a; 1568 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1569 for (i=0; i<mbs; i++) { 1570 cols[0] = i*bs; 1571 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1572 len = ai[i+1] - ai[i]; 1573 for (j=0; j<len; j++) { 1574 rows[0] = (*aj++)*bs; 1575 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1576 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1577 array += bs2; 1578 } 1579 } 1580 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1581 1582 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 1585 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1586 *B = C; 1587 } else { 1588 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 EXTERN_C_BEGIN 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1596 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1597 { 1598 PetscErrorCode ierr; 1599 Mat Btrans; 1600 1601 PetscFunctionBegin; 1602 *f = PETSC_FALSE; 1603 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1604 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1605 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1612 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1613 { 1614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1615 PetscErrorCode ierr; 1616 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1617 int fd; 1618 PetscScalar *aa; 1619 FILE *file; 1620 1621 PetscFunctionBegin; 1622 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1623 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1624 col_lens[0] = MAT_FILE_CLASSID; 1625 1626 col_lens[1] = A->rmap->N; 1627 col_lens[2] = A->cmap->n; 1628 col_lens[3] = a->nz*bs2; 1629 1630 /* store lengths of each row and write (including header) to file */ 1631 count = 0; 1632 for (i=0; i<a->mbs; i++) { 1633 for (j=0; j<bs; j++) { 1634 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1635 } 1636 } 1637 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1638 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1639 1640 /* store column indices (zero start index) */ 1641 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1642 count = 0; 1643 for (i=0; i<a->mbs; i++) { 1644 for (j=0; j<bs; j++) { 1645 for (k=a->i[i]; k<a->i[i+1]; k++) { 1646 for (l=0; l<bs; l++) { 1647 jj[count++] = bs*a->j[k] + l; 1648 } 1649 } 1650 } 1651 } 1652 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1653 ierr = PetscFree(jj);CHKERRQ(ierr); 1654 1655 /* store nonzero values */ 1656 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1657 count = 0; 1658 for (i=0; i<a->mbs; i++) { 1659 for (j=0; j<bs; j++) { 1660 for (k=a->i[i]; k<a->i[i+1]; k++) { 1661 for (l=0; l<bs; l++) { 1662 aa[count++] = a->a[bs2*k + l*bs + j]; 1663 } 1664 } 1665 } 1666 } 1667 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1668 ierr = PetscFree(aa);CHKERRQ(ierr); 1669 1670 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1671 if (file) { 1672 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1673 } 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1679 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1680 { 1681 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1682 PetscErrorCode ierr; 1683 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1684 PetscViewerFormat format; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1688 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1689 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1690 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1691 Mat aij; 1692 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1693 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1694 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1695 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1696 PetscFunctionReturn(0); 1697 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1698 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1699 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1700 for (i=0; i<a->mbs; i++) { 1701 for (j=0; j<bs; j++) { 1702 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1703 for (k=a->i[i]; k<a->i[i+1]; k++) { 1704 for (l=0; l<bs; l++) { 1705 #if defined(PETSC_USE_COMPLEX) 1706 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1707 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1708 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1709 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1710 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1711 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1712 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1713 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1714 } 1715 #else 1716 if (a->a[bs2*k + l*bs + j] != 0.0) { 1717 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1718 } 1719 #endif 1720 } 1721 } 1722 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1723 } 1724 } 1725 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1726 } else { 1727 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1728 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1729 for (i=0; i<a->mbs; i++) { 1730 for (j=0; j<bs; j++) { 1731 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1732 for (k=a->i[i]; k<a->i[i+1]; k++) { 1733 for (l=0; l<bs; l++) { 1734 #if defined(PETSC_USE_COMPLEX) 1735 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1736 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1737 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1738 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1739 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1740 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1741 } else { 1742 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1743 } 1744 #else 1745 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1746 #endif 1747 } 1748 } 1749 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1750 } 1751 } 1752 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1753 } 1754 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #include <petscdraw.h> 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1761 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1762 { 1763 Mat A = (Mat) Aa; 1764 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1765 PetscErrorCode ierr; 1766 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1767 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1768 MatScalar *aa; 1769 PetscViewer viewer; 1770 PetscViewerFormat format; 1771 1772 PetscFunctionBegin; 1773 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1774 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1775 1776 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1777 1778 /* loop over matrix elements drawing boxes */ 1779 1780 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1781 color = PETSC_DRAW_BLUE; 1782 for (i=0,row=0; i<mbs; i++,row+=bs) { 1783 for (j=a->i[i]; j<a->i[i+1]; j++) { 1784 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1785 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1786 aa = a->a + j*bs2; 1787 for (k=0; k<bs; k++) { 1788 for (l=0; l<bs; l++) { 1789 if (PetscRealPart(*aa++) >= 0.) continue; 1790 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1791 } 1792 } 1793 } 1794 } 1795 color = PETSC_DRAW_CYAN; 1796 for (i=0,row=0; i<mbs; i++,row+=bs) { 1797 for (j=a->i[i]; j<a->i[i+1]; j++) { 1798 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1799 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1800 aa = a->a + j*bs2; 1801 for (k=0; k<bs; k++) { 1802 for (l=0; l<bs; l++) { 1803 if (PetscRealPart(*aa++) != 0.) continue; 1804 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1805 } 1806 } 1807 } 1808 } 1809 color = PETSC_DRAW_RED; 1810 for (i=0,row=0; i<mbs; i++,row+=bs) { 1811 for (j=a->i[i]; j<a->i[i+1]; j++) { 1812 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1813 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1814 aa = a->a + j*bs2; 1815 for (k=0; k<bs; k++) { 1816 for (l=0; l<bs; l++) { 1817 if (PetscRealPart(*aa++) <= 0.) continue; 1818 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1819 } 1820 } 1821 } 1822 } 1823 } else { 1824 /* use contour shading to indicate magnitude of values */ 1825 /* first determine max of all nonzero values */ 1826 PetscDraw popup; 1827 PetscReal scale,maxv = 0.0; 1828 1829 for (i=0; i<a->nz*a->bs2; i++) { 1830 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1831 } 1832 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1833 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1834 if (popup) { 1835 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1836 } 1837 for (i=0,row=0; i<mbs; i++,row+=bs) { 1838 for (j=a->i[i]; j<a->i[i+1]; j++) { 1839 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1840 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1841 aa = a->a + j*bs2; 1842 for (k=0; k<bs; k++) { 1843 for (l=0; l<bs; l++) { 1844 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1845 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1846 } 1847 } 1848 } 1849 } 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1856 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1857 { 1858 PetscErrorCode ierr; 1859 PetscReal xl,yl,xr,yr,w,h; 1860 PetscDraw draw; 1861 PetscBool isnull; 1862 1863 PetscFunctionBegin; 1864 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1865 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1866 1867 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1868 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1869 xr += w; yr += h; xl = -w; yl = -h; 1870 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1871 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1872 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatView_SeqBAIJ" 1878 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1879 { 1880 PetscErrorCode ierr; 1881 PetscBool iascii,isbinary,isdraw; 1882 1883 PetscFunctionBegin; 1884 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1885 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1886 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1887 if (iascii) { 1888 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1889 } else if (isbinary) { 1890 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1891 } else if (isdraw) { 1892 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1893 } else { 1894 Mat B; 1895 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1896 ierr = MatView(B,viewer);CHKERRQ(ierr); 1897 ierr = MatDestroy(&B);CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1905 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1906 { 1907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1908 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1909 PetscInt *ai = a->i,*ailen = a->ilen; 1910 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1911 MatScalar *ap,*aa = a->a; 1912 1913 PetscFunctionBegin; 1914 for (k=0; k<m; k++) { /* loop over rows */ 1915 row = im[k]; brow = row/bs; 1916 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1917 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1918 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1919 nrow = ailen[brow]; 1920 for (l=0; l<n; l++) { /* loop over columns */ 1921 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1922 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1923 col = in[l]; 1924 bcol = col/bs; 1925 cidx = col%bs; 1926 ridx = row%bs; 1927 high = nrow; 1928 low = 0; /* assume unsorted */ 1929 while (high-low > 5) { 1930 t = (low+high)/2; 1931 if (rp[t] > bcol) high = t; 1932 else low = t; 1933 } 1934 for (i=low; i<high; i++) { 1935 if (rp[i] > bcol) break; 1936 if (rp[i] == bcol) { 1937 *v++ = ap[bs2*i+bs*cidx+ridx]; 1938 goto finished; 1939 } 1940 } 1941 *v++ = 0.0; 1942 finished:; 1943 } 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1950 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1951 { 1952 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1953 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1954 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1955 PetscErrorCode ierr; 1956 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1957 PetscBool roworiented=a->roworiented; 1958 const PetscScalar *value = v; 1959 MatScalar *ap,*aa = a->a,*bap; 1960 1961 PetscFunctionBegin; 1962 if (roworiented) { 1963 stepval = (n-1)*bs; 1964 } else { 1965 stepval = (m-1)*bs; 1966 } 1967 for (k=0; k<m; k++) { /* loop over added rows */ 1968 row = im[k]; 1969 if (row < 0) continue; 1970 #if defined(PETSC_USE_DEBUG) 1971 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1972 #endif 1973 rp = aj + ai[row]; 1974 ap = aa + bs2*ai[row]; 1975 rmax = imax[row]; 1976 nrow = ailen[row]; 1977 low = 0; 1978 high = nrow; 1979 for (l=0; l<n; l++) { /* loop over added columns */ 1980 if (in[l] < 0) continue; 1981 #if defined(PETSC_USE_DEBUG) 1982 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1983 #endif 1984 col = in[l]; 1985 if (roworiented) { 1986 value = v + (k*(stepval+bs) + l)*bs; 1987 } else { 1988 value = v + (l*(stepval+bs) + k)*bs; 1989 } 1990 if (col <= lastcol) low = 0; 1991 else high = nrow; 1992 lastcol = col; 1993 while (high-low > 7) { 1994 t = (low+high)/2; 1995 if (rp[t] > col) high = t; 1996 else low = t; 1997 } 1998 for (i=low; i<high; i++) { 1999 if (rp[i] > col) break; 2000 if (rp[i] == col) { 2001 bap = ap + bs2*i; 2002 if (roworiented) { 2003 if (is == ADD_VALUES) { 2004 for (ii=0; ii<bs; ii++,value+=stepval) { 2005 for (jj=ii; jj<bs2; jj+=bs) { 2006 bap[jj] += *value++; 2007 } 2008 } 2009 } else { 2010 for (ii=0; ii<bs; ii++,value+=stepval) { 2011 for (jj=ii; jj<bs2; jj+=bs) { 2012 bap[jj] = *value++; 2013 } 2014 } 2015 } 2016 } else { 2017 if (is == ADD_VALUES) { 2018 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2019 for (jj=0; jj<bs; jj++) { 2020 bap[jj] += value[jj]; 2021 } 2022 bap += bs; 2023 } 2024 } else { 2025 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2026 for (jj=0; jj<bs; jj++) { 2027 bap[jj] = value[jj]; 2028 } 2029 bap += bs; 2030 } 2031 } 2032 } 2033 goto noinsert2; 2034 } 2035 } 2036 if (nonew == 1) goto noinsert2; 2037 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2038 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2039 N = nrow++ - 1; high++; 2040 /* shift up all the later entries in this row */ 2041 for (ii=N; ii>=i; ii--) { 2042 rp[ii+1] = rp[ii]; 2043 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2044 } 2045 if (N >= i) { 2046 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 rp[i] = col; 2049 bap = ap + bs2*i; 2050 if (roworiented) { 2051 for (ii=0; ii<bs; ii++,value+=stepval) { 2052 for (jj=ii; jj<bs2; jj+=bs) { 2053 bap[jj] = *value++; 2054 } 2055 } 2056 } else { 2057 for (ii=0; ii<bs; ii++,value+=stepval) { 2058 for (jj=0; jj<bs; jj++) { 2059 *bap++ = *value++; 2060 } 2061 } 2062 } 2063 noinsert2:; 2064 low = i; 2065 } 2066 ailen[row] = nrow; 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2073 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2074 { 2075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2076 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2077 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2078 PetscErrorCode ierr; 2079 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2080 MatScalar *aa = a->a,*ap; 2081 PetscReal ratio=0.6; 2082 2083 PetscFunctionBegin; 2084 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2085 2086 if (m) rmax = ailen[0]; 2087 for (i=1; i<mbs; i++) { 2088 /* move each row back by the amount of empty slots (fshift) before it*/ 2089 fshift += imax[i-1] - ailen[i-1]; 2090 rmax = PetscMax(rmax,ailen[i]); 2091 if (fshift) { 2092 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2093 N = ailen[i]; 2094 for (j=0; j<N; j++) { 2095 ip[j-fshift] = ip[j]; 2096 2097 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2098 } 2099 } 2100 ai[i] = ai[i-1] + ailen[i-1]; 2101 } 2102 if (mbs) { 2103 fshift += imax[mbs-1] - ailen[mbs-1]; 2104 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2105 } 2106 /* reset ilen and imax for each row */ 2107 for (i=0; i<mbs; i++) { 2108 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2109 } 2110 a->nz = ai[mbs]; 2111 2112 /* diagonals may have moved, so kill the diagonal pointers */ 2113 a->idiagvalid = PETSC_FALSE; 2114 if (fshift && a->diag) { 2115 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2116 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2117 a->diag = 0; 2118 } 2119 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2120 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2121 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2122 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2123 2124 A->info.mallocs += a->reallocs; 2125 a->reallocs = 0; 2126 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2127 2128 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2129 2130 A->same_nonzero = PETSC_TRUE; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /* 2135 This function returns an array of flags which indicate the locations of contiguous 2136 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2137 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2138 Assume: sizes should be long enough to hold all the values. 2139 */ 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2142 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2143 { 2144 PetscInt i,j,k,row; 2145 PetscBool flg; 2146 2147 PetscFunctionBegin; 2148 for (i=0,j=0; i<n; j++) { 2149 row = idx[i]; 2150 if (row%bs!=0) { /* Not the begining of a block */ 2151 sizes[j] = 1; 2152 i++; 2153 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2154 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2155 i++; 2156 } else { /* Begining of the block, so check if the complete block exists */ 2157 flg = PETSC_TRUE; 2158 for (k=1; k<bs; k++) { 2159 if (row+k != idx[i+k]) { /* break in the block */ 2160 flg = PETSC_FALSE; 2161 break; 2162 } 2163 } 2164 if (flg) { /* No break in the bs */ 2165 sizes[j] = bs; 2166 i += bs; 2167 } else { 2168 sizes[j] = 1; 2169 i++; 2170 } 2171 } 2172 } 2173 *bs_max = j; 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2179 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2180 { 2181 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2182 PetscErrorCode ierr; 2183 PetscInt i,j,k,count,*rows; 2184 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2185 PetscScalar zero = 0.0; 2186 MatScalar *aa; 2187 const PetscScalar *xx; 2188 PetscScalar *bb; 2189 2190 PetscFunctionBegin; 2191 /* fix right hand side if needed */ 2192 if (x && b) { 2193 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2194 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2195 for (i=0; i<is_n; i++) { 2196 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2197 } 2198 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2199 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2200 } 2201 2202 /* Make a copy of the IS and sort it */ 2203 /* allocate memory for rows,sizes */ 2204 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2205 2206 /* copy IS values to rows, and sort them */ 2207 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2208 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2209 2210 if (baij->keepnonzeropattern) { 2211 for (i=0; i<is_n; i++) sizes[i] = 1; 2212 bs_max = is_n; 2213 A->same_nonzero = PETSC_TRUE; 2214 } else { 2215 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2216 2217 A->same_nonzero = PETSC_FALSE; 2218 } 2219 2220 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2221 row = rows[j]; 2222 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2223 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2224 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2225 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2226 if (diag != (PetscScalar)0.0) { 2227 if (baij->ilen[row/bs] > 0) { 2228 baij->ilen[row/bs] = 1; 2229 baij->j[baij->i[row/bs]] = row/bs; 2230 2231 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2232 } 2233 /* Now insert all the diagonal values for this bs */ 2234 for (k=0; k<bs; k++) { 2235 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2236 } 2237 } else { /* (diag == 0.0) */ 2238 baij->ilen[row/bs] = 0; 2239 } /* end (diag == 0.0) */ 2240 } else { /* (sizes[i] != bs) */ 2241 #if defined(PETSC_USE_DEBUG) 2242 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2243 #endif 2244 for (k=0; k<count; k++) { 2245 aa[0] = zero; 2246 aa += bs; 2247 } 2248 if (diag != (PetscScalar)0.0) { 2249 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2250 } 2251 } 2252 } 2253 2254 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2255 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2261 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2262 { 2263 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2264 PetscErrorCode ierr; 2265 PetscInt i,j,k,count; 2266 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2267 PetscScalar zero = 0.0; 2268 MatScalar *aa; 2269 const PetscScalar *xx; 2270 PetscScalar *bb; 2271 PetscBool *zeroed,vecs = PETSC_FALSE; 2272 2273 PetscFunctionBegin; 2274 /* fix right hand side if needed */ 2275 if (x && b) { 2276 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2277 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2278 vecs = PETSC_TRUE; 2279 } 2280 A->same_nonzero = PETSC_TRUE; 2281 2282 /* zero the columns */ 2283 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2284 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2285 for (i=0; i<is_n; i++) { 2286 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2287 zeroed[is_idx[i]] = PETSC_TRUE; 2288 } 2289 for (i=0; i<A->rmap->N; i++) { 2290 if (!zeroed[i]) { 2291 row = i/bs; 2292 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2293 for (k=0; k<bs; k++) { 2294 col = bs*baij->j[j] + k; 2295 if (zeroed[col]) { 2296 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2297 if (vecs) bb[i] -= aa[0]*xx[col]; 2298 aa[0] = 0.0; 2299 } 2300 } 2301 } 2302 } else if (vecs) bb[i] = diag*xx[i]; 2303 } 2304 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2305 if (vecs) { 2306 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2307 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2308 } 2309 2310 /* zero the rows */ 2311 for (i=0; i<is_n; i++) { 2312 row = is_idx[i]; 2313 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2314 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2315 for (k=0; k<count; k++) { 2316 aa[0] = zero; 2317 aa += bs; 2318 } 2319 if (diag != (PetscScalar)0.0) { 2320 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2321 } 2322 } 2323 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2329 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2330 { 2331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2332 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2333 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2334 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2335 PetscErrorCode ierr; 2336 PetscInt ridx,cidx,bs2=a->bs2; 2337 PetscBool roworiented=a->roworiented; 2338 MatScalar *ap,value,*aa=a->a,*bap; 2339 2340 PetscFunctionBegin; 2341 if (v) PetscValidScalarPointer(v,6); 2342 for (k=0; k<m; k++) { /* loop over added rows */ 2343 row = im[k]; 2344 brow = row/bs; 2345 if (row < 0) continue; 2346 #if defined(PETSC_USE_DEBUG) 2347 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2348 #endif 2349 rp = aj + ai[brow]; 2350 ap = aa + bs2*ai[brow]; 2351 rmax = imax[brow]; 2352 nrow = ailen[brow]; 2353 low = 0; 2354 high = nrow; 2355 for (l=0; l<n; l++) { /* loop over added columns */ 2356 if (in[l] < 0) continue; 2357 #if defined(PETSC_USE_DEBUG) 2358 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2359 #endif 2360 col = in[l]; bcol = col/bs; 2361 ridx = row % bs; cidx = col % bs; 2362 if (roworiented) { 2363 value = v[l + k*n]; 2364 } else { 2365 value = v[k + l*m]; 2366 } 2367 if (col <= lastcol) low = 0; else high = nrow; 2368 lastcol = col; 2369 while (high-low > 7) { 2370 t = (low+high)/2; 2371 if (rp[t] > bcol) high = t; 2372 else low = t; 2373 } 2374 for (i=low; i<high; i++) { 2375 if (rp[i] > bcol) break; 2376 if (rp[i] == bcol) { 2377 bap = ap + bs2*i + bs*cidx + ridx; 2378 if (is == ADD_VALUES) *bap += value; 2379 else *bap = value; 2380 goto noinsert1; 2381 } 2382 } 2383 if (nonew == 1) goto noinsert1; 2384 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2385 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2386 N = nrow++ - 1; high++; 2387 /* shift up all the later entries in this row */ 2388 for (ii=N; ii>=i; ii--) { 2389 rp[ii+1] = rp[ii]; 2390 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2391 } 2392 if (N>=i) { 2393 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2394 } 2395 rp[i] = bcol; 2396 ap[bs2*i + bs*cidx + ridx] = value; 2397 a->nz++; 2398 noinsert1:; 2399 low = i; 2400 } 2401 ailen[brow] = nrow; 2402 } 2403 A->same_nonzero = PETSC_FALSE; 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2409 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2410 { 2411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2412 Mat outA; 2413 PetscErrorCode ierr; 2414 PetscBool row_identity,col_identity; 2415 2416 PetscFunctionBegin; 2417 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2418 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2419 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2420 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2421 2422 outA = inA; 2423 inA->factortype = MAT_FACTOR_LU; 2424 2425 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2426 2427 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2428 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2429 a->row = row; 2430 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2431 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2432 a->col = col; 2433 2434 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2435 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2436 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2437 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2438 2439 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2440 if (!a->solve_work) { 2441 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2442 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2443 } 2444 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 EXTERN_C_BEGIN 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2451 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2452 { 2453 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2454 PetscInt i,nz,mbs; 2455 2456 PetscFunctionBegin; 2457 nz = baij->maxnz; 2458 mbs = baij->mbs; 2459 for (i=0; i<nz; i++) { 2460 baij->j[i] = indices[i]; 2461 } 2462 baij->nz = nz; 2463 for (i=0; i<mbs; i++) { 2464 baij->ilen[i] = baij->imax[i]; 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 EXTERN_C_END 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2472 /*@ 2473 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2474 in the matrix. 2475 2476 Input Parameters: 2477 + mat - the SeqBAIJ matrix 2478 - indices - the column indices 2479 2480 Level: advanced 2481 2482 Notes: 2483 This can be called if you have precomputed the nonzero structure of the 2484 matrix and want to provide it to the matrix object to improve the performance 2485 of the MatSetValues() operation. 2486 2487 You MUST have set the correct numbers of nonzeros per row in the call to 2488 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2489 2490 MUST be called before any calls to MatSetValues(); 2491 2492 @*/ 2493 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2494 { 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2499 PetscValidPointer(indices,2); 2500 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2506 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2507 { 2508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2509 PetscErrorCode ierr; 2510 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2511 PetscReal atmp; 2512 PetscScalar *x,zero = 0.0; 2513 MatScalar *aa; 2514 PetscInt ncols,brow,krow,kcol; 2515 2516 PetscFunctionBegin; 2517 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2518 bs = A->rmap->bs; 2519 aa = a->a; 2520 ai = a->i; 2521 aj = a->j; 2522 mbs = a->mbs; 2523 2524 ierr = VecSet(v,zero);CHKERRQ(ierr); 2525 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2526 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2527 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2528 for (i=0; i<mbs; i++) { 2529 ncols = ai[1] - ai[0]; ai++; 2530 brow = bs*i; 2531 for (j=0; j<ncols; j++) { 2532 for (kcol=0; kcol<bs; kcol++) { 2533 for (krow=0; krow<bs; krow++) { 2534 atmp = PetscAbsScalar(*aa);aa++; 2535 row = brow + krow; /* row index */ 2536 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2537 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2538 } 2539 } 2540 aj++; 2541 } 2542 } 2543 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatCopy_SeqBAIJ" 2549 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2550 { 2551 PetscErrorCode ierr; 2552 2553 PetscFunctionBegin; 2554 /* If the two matrices have the same copy implementation, use fast copy. */ 2555 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2556 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2557 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2558 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2559 2560 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2561 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2562 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2563 } else { 2564 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2565 } 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #undef __FUNCT__ 2570 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2571 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2572 { 2573 PetscErrorCode ierr; 2574 2575 PetscFunctionBegin; 2576 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 #undef __FUNCT__ 2581 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2582 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2583 { 2584 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2585 2586 PetscFunctionBegin; 2587 *array = a->a; 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2593 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2594 { 2595 PetscFunctionBegin; 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2601 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2602 { 2603 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2604 PetscErrorCode ierr; 2605 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2606 PetscBLASInt one=1; 2607 2608 PetscFunctionBegin; 2609 if (str == SAME_NONZERO_PATTERN) { 2610 PetscScalar alpha = a; 2611 PetscBLASInt bnz; 2612 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2613 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2614 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2615 if (y->xtoy && y->XtoY != X) { 2616 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2617 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2618 } 2619 if (!y->xtoy) { /* get xtoy */ 2620 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2621 y->XtoY = X; 2622 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2623 } 2624 for (i=0; i<x->nz; i++) { 2625 j = 0; 2626 while (j < bs2) { 2627 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2628 j++; 2629 } 2630 } 2631 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2632 } else { 2633 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2634 } 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2640 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2641 { 2642 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2643 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2644 MatScalar *aa = a->a; 2645 2646 PetscFunctionBegin; 2647 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2653 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2654 { 2655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2656 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2657 MatScalar *aa = a->a; 2658 2659 PetscFunctionBegin; 2660 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2665 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2668 /* 2669 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2670 */ 2671 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2672 { 2673 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2674 PetscErrorCode ierr; 2675 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2676 PetscInt nz = a->i[m],row,*jj,mr,col; 2677 2678 PetscFunctionBegin; 2679 *nn = n; 2680 if (!ia) PetscFunctionReturn(0); 2681 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2682 else { 2683 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2684 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2685 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2686 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2687 jj = a->j; 2688 for (i=0; i<nz; i++) { 2689 collengths[jj[i]]++; 2690 } 2691 cia[0] = oshift; 2692 for (i=0; i<n; i++) { 2693 cia[i+1] = cia[i] + collengths[i]; 2694 } 2695 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2696 jj = a->j; 2697 for (row=0; row<m; row++) { 2698 mr = a->i[row+1] - a->i[row]; 2699 for (i=0; i<mr; i++) { 2700 col = *jj++; 2701 2702 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2703 } 2704 } 2705 ierr = PetscFree(collengths);CHKERRQ(ierr); 2706 *ia = cia; *ja = cja; 2707 } 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2713 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2714 { 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 if (!ia) PetscFunctionReturn(0); 2719 ierr = PetscFree(*ia);CHKERRQ(ierr); 2720 ierr = PetscFree(*ja);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2726 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2727 { 2728 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2729 PetscErrorCode ierr; 2730 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2731 PetscScalar dx,*y,*xx,*w3_array; 2732 PetscScalar *vscale_array; 2733 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2734 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2735 void *fctx = coloring->fctx; 2736 PetscBool flg = PETSC_FALSE; 2737 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2738 Vec x1_tmp; 2739 2740 PetscFunctionBegin; 2741 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2742 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2743 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2744 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2745 2746 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2747 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2748 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2749 if (flg) { 2750 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2751 } else { 2752 PetscBool assembled; 2753 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2754 if (assembled) { 2755 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2756 } 2757 } 2758 2759 x1_tmp = x1; 2760 if (!coloring->vscale) { 2761 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2762 } 2763 2764 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2765 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2766 } 2767 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2768 2769 /* Set w1 = F(x1) */ 2770 if (!coloring->fset) { 2771 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2772 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2773 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2774 } else { 2775 coloring->fset = PETSC_FALSE; 2776 } 2777 2778 if (!coloring->w3) { 2779 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2781 } 2782 w3 = coloring->w3; 2783 2784 CHKMEMQ; 2785 /* Compute all the local scale factors, including ghost points */ 2786 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2787 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2788 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2789 if (ctype == IS_COLORING_GHOSTED) { 2790 col_start = 0; col_end = N; 2791 } else if (ctype == IS_COLORING_GLOBAL) { 2792 xx = xx - start; 2793 vscale_array = vscale_array - start; 2794 col_start = start; col_end = N + start; 2795 } 2796 CHKMEMQ; 2797 for (col=col_start; col<col_end; col++) { 2798 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2799 if (coloring->htype[0] == 'w') { 2800 dx = 1.0 + unorm; 2801 } else { 2802 dx = xx[col]; 2803 } 2804 if (dx == (PetscScalar)0.0) dx = 1.0; 2805 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2806 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2807 dx *= epsilon; 2808 vscale_array[col] = (PetscScalar)1.0/dx; 2809 } CHKMEMQ; 2810 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2811 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2812 if (ctype == IS_COLORING_GLOBAL) { 2813 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2814 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2815 } 2816 CHKMEMQ; 2817 if (coloring->vscaleforrow) { 2818 vscaleforrow = coloring->vscaleforrow; 2819 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2820 2821 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2822 /* 2823 Loop over each color 2824 */ 2825 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2826 for (k=0; k<coloring->ncolors; k++) { 2827 coloring->currentcolor = k; 2828 for (i=0; i<bs; i++) { 2829 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2830 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2831 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2832 /* 2833 Loop over each column associated with color 2834 adding the perturbation to the vector w3. 2835 */ 2836 for (l=0; l<coloring->ncolumns[k]; l++) { 2837 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2838 if (coloring->htype[0] == 'w') { 2839 dx = 1.0 + unorm; 2840 } else { 2841 dx = xx[col]; 2842 } 2843 if (dx == (PetscScalar)0.0) dx = 1.0; 2844 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2845 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2846 dx *= epsilon; 2847 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2848 w3_array[col] += dx; 2849 } 2850 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2851 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2852 2853 /* 2854 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2855 w2 = F(x1 + dx) - F(x1) 2856 */ 2857 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2858 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2859 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2860 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2861 2862 /* 2863 Loop over rows of vector, putting results into Jacobian matrix 2864 */ 2865 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2866 for (l=0; l<coloring->nrows[k]; l++) { 2867 row = bs*coloring->rows[k][l]; /* local row index */ 2868 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2869 for (j=0; j<bs; j++) { 2870 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2871 srows[j] = row + start + j; 2872 } 2873 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2874 } 2875 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2876 } 2877 } /* endof for each color */ 2878 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2879 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2880 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2881 ierr = PetscFree(srows);CHKERRQ(ierr); 2882 2883 coloring->currentcolor = -1; 2884 2885 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2887 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2888 PetscFunctionReturn(0); 2889 } 2890 2891 /* -------------------------------------------------------------------*/ 2892 static struct _MatOps MatOps_Values = { 2893 MatSetValues_SeqBAIJ, 2894 MatGetRow_SeqBAIJ, 2895 MatRestoreRow_SeqBAIJ, 2896 MatMult_SeqBAIJ_N, 2897 /* 4*/ MatMultAdd_SeqBAIJ_N, 2898 MatMultTranspose_SeqBAIJ, 2899 MatMultTransposeAdd_SeqBAIJ, 2900 0, 2901 0, 2902 0, 2903 /* 10*/ 0, 2904 MatLUFactor_SeqBAIJ, 2905 0, 2906 0, 2907 MatTranspose_SeqBAIJ, 2908 /* 15*/ MatGetInfo_SeqBAIJ, 2909 MatEqual_SeqBAIJ, 2910 MatGetDiagonal_SeqBAIJ, 2911 MatDiagonalScale_SeqBAIJ, 2912 MatNorm_SeqBAIJ, 2913 /* 20*/ 0, 2914 MatAssemblyEnd_SeqBAIJ, 2915 MatSetOption_SeqBAIJ, 2916 MatZeroEntries_SeqBAIJ, 2917 /* 24*/ MatZeroRows_SeqBAIJ, 2918 0, 2919 0, 2920 0, 2921 0, 2922 /* 29*/ MatSetUp_SeqBAIJ, 2923 0, 2924 0, 2925 0, 2926 0, 2927 /* 34*/ MatDuplicate_SeqBAIJ, 2928 0, 2929 0, 2930 MatILUFactor_SeqBAIJ, 2931 0, 2932 /* 39*/ MatAXPY_SeqBAIJ, 2933 MatGetSubMatrices_SeqBAIJ, 2934 MatIncreaseOverlap_SeqBAIJ, 2935 MatGetValues_SeqBAIJ, 2936 MatCopy_SeqBAIJ, 2937 /* 44*/ 0, 2938 MatScale_SeqBAIJ, 2939 0, 2940 0, 2941 MatZeroRowsColumns_SeqBAIJ, 2942 /* 49*/ 0, 2943 MatGetRowIJ_SeqBAIJ, 2944 MatRestoreRowIJ_SeqBAIJ, 2945 MatGetColumnIJ_SeqBAIJ, 2946 MatRestoreColumnIJ_SeqBAIJ, 2947 /* 54*/ MatFDColoringCreate_SeqAIJ, 2948 0, 2949 0, 2950 0, 2951 MatSetValuesBlocked_SeqBAIJ, 2952 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2953 MatDestroy_SeqBAIJ, 2954 MatView_SeqBAIJ, 2955 0, 2956 0, 2957 /* 64*/ 0, 2958 0, 2959 0, 2960 0, 2961 0, 2962 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2963 0, 2964 MatConvert_Basic, 2965 0, 2966 0, 2967 /* 74*/ 0, 2968 MatFDColoringApply_BAIJ, 2969 0, 2970 0, 2971 0, 2972 /* 79*/ 0, 2973 0, 2974 0, 2975 0, 2976 MatLoad_SeqBAIJ, 2977 /* 84*/ 0, 2978 0, 2979 0, 2980 0, 2981 0, 2982 /* 89*/ 0, 2983 0, 2984 0, 2985 0, 2986 0, 2987 /* 94*/ 0, 2988 0, 2989 0, 2990 0, 2991 0, 2992 /* 99*/ 0, 2993 0, 2994 0, 2995 0, 2996 0, 2997 /*104*/ 0, 2998 MatRealPart_SeqBAIJ, 2999 MatImaginaryPart_SeqBAIJ, 3000 0, 3001 0, 3002 /*109*/ 0, 3003 0, 3004 0, 3005 0, 3006 MatMissingDiagonal_SeqBAIJ, 3007 /*114*/ 0, 3008 0, 3009 0, 3010 0, 3011 0, 3012 /*119*/ 0, 3013 0, 3014 MatMultHermitianTranspose_SeqBAIJ, 3015 MatMultHermitianTransposeAdd_SeqBAIJ, 3016 0, 3017 /*124*/ 0, 3018 0, 3019 MatInvertBlockDiagonal_SeqBAIJ 3020 }; 3021 3022 EXTERN_C_BEGIN 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3025 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3026 { 3027 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3028 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3029 PetscErrorCode ierr; 3030 3031 PetscFunctionBegin; 3032 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3033 3034 /* allocate space for values if not already there */ 3035 if (!aij->saved_values) { 3036 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3037 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3038 } 3039 3040 /* copy values over */ 3041 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3042 PetscFunctionReturn(0); 3043 } 3044 EXTERN_C_END 3045 3046 EXTERN_C_BEGIN 3047 #undef __FUNCT__ 3048 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3049 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3050 { 3051 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3052 PetscErrorCode ierr; 3053 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3054 3055 PetscFunctionBegin; 3056 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3057 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3058 3059 /* copy values over */ 3060 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 EXTERN_C_BEGIN 3066 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3067 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3068 EXTERN_C_END 3069 3070 EXTERN_C_BEGIN 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3073 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3074 { 3075 Mat_SeqBAIJ *b; 3076 PetscErrorCode ierr; 3077 PetscInt i,mbs,nbs,bs2; 3078 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3079 3080 PetscFunctionBegin; 3081 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3082 if (nz == MAT_SKIP_ALLOCATION) { 3083 skipallocation = PETSC_TRUE; 3084 nz = 0; 3085 } 3086 3087 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3088 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3089 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3090 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3091 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3092 3093 B->preallocated = PETSC_TRUE; 3094 3095 mbs = B->rmap->n/bs; 3096 nbs = B->cmap->n/bs; 3097 bs2 = bs*bs; 3098 3099 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3100 3101 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3102 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3103 if (nnz) { 3104 for (i=0; i<mbs; i++) { 3105 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3106 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3107 } 3108 } 3109 3110 b = (Mat_SeqBAIJ*)B->data; 3111 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3112 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3113 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3114 3115 if (!flg) { 3116 switch (bs) { 3117 case 1: 3118 B->ops->mult = MatMult_SeqBAIJ_1; 3119 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3120 B->ops->sor = MatSOR_SeqBAIJ_1; 3121 break; 3122 case 2: 3123 B->ops->mult = MatMult_SeqBAIJ_2; 3124 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3125 B->ops->sor = MatSOR_SeqBAIJ_2; 3126 break; 3127 case 3: 3128 B->ops->mult = MatMult_SeqBAIJ_3; 3129 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3130 B->ops->sor = MatSOR_SeqBAIJ_3; 3131 break; 3132 case 4: 3133 B->ops->mult = MatMult_SeqBAIJ_4; 3134 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3135 B->ops->sor = MatSOR_SeqBAIJ_4; 3136 break; 3137 case 5: 3138 B->ops->mult = MatMult_SeqBAIJ_5; 3139 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3140 B->ops->sor = MatSOR_SeqBAIJ_5; 3141 break; 3142 case 6: 3143 B->ops->mult = MatMult_SeqBAIJ_6; 3144 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3145 B->ops->sor = MatSOR_SeqBAIJ_6; 3146 break; 3147 case 7: 3148 B->ops->mult = MatMult_SeqBAIJ_7; 3149 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3150 B->ops->sor = MatSOR_SeqBAIJ_7; 3151 break; 3152 case 15: 3153 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3154 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3155 B->ops->sor = MatSOR_SeqBAIJ_N; 3156 break; 3157 default: 3158 B->ops->mult = MatMult_SeqBAIJ_N; 3159 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3160 B->ops->sor = MatSOR_SeqBAIJ_N; 3161 break; 3162 } 3163 } 3164 b->mbs = mbs; 3165 b->nbs = nbs; 3166 if (!skipallocation) { 3167 if (!b->imax) { 3168 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3169 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3170 3171 b->free_imax_ilen = PETSC_TRUE; 3172 } 3173 /* b->ilen will count nonzeros in each block row so far. */ 3174 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3175 if (!nnz) { 3176 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3177 else if (nz < 0) nz = 1; 3178 for (i=0; i<mbs; i++) b->imax[i] = nz; 3179 nz = nz*mbs; 3180 } else { 3181 nz = 0; 3182 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3183 } 3184 3185 /* allocate the matrix space */ 3186 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3187 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3188 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3189 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3190 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3191 3192 b->singlemalloc = PETSC_TRUE; 3193 b->i[0] = 0; 3194 for (i=1; i<mbs+1; i++) { 3195 b->i[i] = b->i[i-1] + b->imax[i-1]; 3196 } 3197 b->free_a = PETSC_TRUE; 3198 b->free_ij = PETSC_TRUE; 3199 } else { 3200 b->free_a = PETSC_FALSE; 3201 b->free_ij = PETSC_FALSE; 3202 } 3203 3204 b->bs2 = bs2; 3205 b->mbs = mbs; 3206 b->nz = 0; 3207 b->maxnz = nz; 3208 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3209 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3210 PetscFunctionReturn(0); 3211 } 3212 EXTERN_C_END 3213 3214 EXTERN_C_BEGIN 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3217 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3218 { 3219 PetscInt i,m,nz,nz_max=0,*nnz; 3220 PetscScalar *values=0; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3225 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3226 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3227 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3228 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3229 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3230 m = B->rmap->n/bs; 3231 3232 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3233 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3234 for (i=0; i<m; i++) { 3235 nz = ii[i+1]- ii[i]; 3236 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3237 nz_max = PetscMax(nz_max, nz); 3238 nnz[i] = nz; 3239 } 3240 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3241 ierr = PetscFree(nnz);CHKERRQ(ierr); 3242 3243 values = (PetscScalar*)V; 3244 if (!values) { 3245 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3246 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3247 } 3248 for (i=0; i<m; i++) { 3249 PetscInt ncols = ii[i+1] - ii[i]; 3250 const PetscInt *icols = jj + ii[i]; 3251 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3252 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3253 } 3254 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3255 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3257 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3258 PetscFunctionReturn(0); 3259 } 3260 EXTERN_C_END 3261 3262 3263 EXTERN_C_BEGIN 3264 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3265 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3266 #if defined(PETSC_HAVE_MUMPS) 3267 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3268 #endif 3269 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3270 EXTERN_C_END 3271 3272 /*MC 3273 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3274 block sparse compressed row format. 3275 3276 Options Database Keys: 3277 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3278 3279 Level: beginner 3280 3281 .seealso: MatCreateSeqBAIJ() 3282 M*/ 3283 3284 EXTERN_C_BEGIN 3285 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3286 EXTERN_C_END 3287 3288 EXTERN_C_BEGIN 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatCreate_SeqBAIJ" 3291 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3292 { 3293 PetscErrorCode ierr; 3294 PetscMPIInt size; 3295 Mat_SeqBAIJ *b; 3296 3297 PetscFunctionBegin; 3298 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3299 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3300 3301 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3302 B->data = (void*)b; 3303 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3304 3305 b->row = 0; 3306 b->col = 0; 3307 b->icol = 0; 3308 b->reallocs = 0; 3309 b->saved_values = 0; 3310 3311 b->roworiented = PETSC_TRUE; 3312 b->nonew = 0; 3313 b->diag = 0; 3314 b->solve_work = 0; 3315 b->mult_work = 0; 3316 B->spptr = 0; 3317 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3318 b->keepnonzeropattern = PETSC_FALSE; 3319 b->xtoy = 0; 3320 b->XtoY = 0; 3321 B->same_nonzero = PETSC_FALSE; 3322 3323 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3324 "MatGetFactorAvailable_seqbaij_petsc", 3325 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3326 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3327 "MatGetFactor_seqbaij_petsc", 3328 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3329 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3330 "MatGetFactor_seqbaij_bstrm", 3331 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3332 #if defined(PETSC_HAVE_MUMPS) 3333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3334 #endif 3335 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3336 "MatInvertBlockDiagonal_SeqBAIJ", 3337 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3339 "MatStoreValues_SeqBAIJ", 3340 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3342 "MatRetrieveValues_SeqBAIJ", 3343 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3345 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3346 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3348 "MatConvert_SeqBAIJ_SeqAIJ", 3349 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3351 "MatConvert_SeqBAIJ_SeqSBAIJ", 3352 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3354 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3355 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3356 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3357 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3358 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3359 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3360 "MatConvert_SeqBAIJ_SeqBSTRM", 3361 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3363 "MatIsTranspose_SeqBAIJ", 3364 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3365 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3366 PetscFunctionReturn(0); 3367 } 3368 EXTERN_C_END 3369 3370 #undef __FUNCT__ 3371 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3372 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3373 { 3374 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3375 PetscErrorCode ierr; 3376 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3377 3378 PetscFunctionBegin; 3379 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3380 3381 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3382 c->imax = a->imax; 3383 c->ilen = a->ilen; 3384 c->free_imax_ilen = PETSC_FALSE; 3385 } else { 3386 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3387 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3388 for (i=0; i<mbs; i++) { 3389 c->imax[i] = a->imax[i]; 3390 c->ilen[i] = a->ilen[i]; 3391 } 3392 c->free_imax_ilen = PETSC_TRUE; 3393 } 3394 3395 /* allocate the matrix space */ 3396 if (mallocmatspace) { 3397 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3398 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3399 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3400 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3401 3402 c->i = a->i; 3403 c->j = a->j; 3404 c->singlemalloc = PETSC_FALSE; 3405 c->free_a = PETSC_TRUE; 3406 c->free_ij = PETSC_FALSE; 3407 c->parent = A; 3408 C->preallocated = PETSC_TRUE; 3409 C->assembled = PETSC_TRUE; 3410 3411 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3412 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3413 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3414 } else { 3415 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3416 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3417 3418 c->singlemalloc = PETSC_TRUE; 3419 c->free_a = PETSC_TRUE; 3420 c->free_ij = PETSC_TRUE; 3421 3422 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3423 if (mbs > 0) { 3424 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3425 if (cpvalues == MAT_COPY_VALUES) { 3426 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3427 } else { 3428 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3429 } 3430 } 3431 C->preallocated = PETSC_TRUE; 3432 C->assembled = PETSC_TRUE; 3433 } 3434 } 3435 3436 c->roworiented = a->roworiented; 3437 c->nonew = a->nonew; 3438 3439 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3440 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3441 3442 c->bs2 = a->bs2; 3443 c->mbs = a->mbs; 3444 c->nbs = a->nbs; 3445 3446 if (a->diag) { 3447 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3448 c->diag = a->diag; 3449 c->free_diag = PETSC_FALSE; 3450 } else { 3451 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3452 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3453 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3454 c->free_diag = PETSC_TRUE; 3455 } 3456 } else c->diag = 0; 3457 3458 c->nz = a->nz; 3459 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3460 c->solve_work = 0; 3461 c->mult_work = 0; 3462 3463 c->compressedrow.use = a->compressedrow.use; 3464 c->compressedrow.nrows = a->compressedrow.nrows; 3465 c->compressedrow.check = a->compressedrow.check; 3466 if (a->compressedrow.use) { 3467 i = a->compressedrow.nrows; 3468 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3469 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3470 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3471 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3472 } else { 3473 c->compressedrow.use = PETSC_FALSE; 3474 c->compressedrow.i = NULL; 3475 c->compressedrow.rindex = NULL; 3476 } 3477 C->same_nonzero = A->same_nonzero; 3478 3479 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3480 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3481 PetscFunctionReturn(0); 3482 } 3483 3484 #undef __FUNCT__ 3485 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3486 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3487 { 3488 PetscErrorCode ierr; 3489 3490 PetscFunctionBegin; 3491 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3492 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3493 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3494 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3495 PetscFunctionReturn(0); 3496 } 3497 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatLoad_SeqBAIJ" 3500 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3501 { 3502 Mat_SeqBAIJ *a; 3503 PetscErrorCode ierr; 3504 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3505 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3506 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3507 PetscInt *masked,nmask,tmp,bs2,ishift; 3508 PetscMPIInt size; 3509 int fd; 3510 PetscScalar *aa; 3511 MPI_Comm comm; 3512 3513 PetscFunctionBegin; 3514 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3515 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3516 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3517 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3518 bs2 = bs*bs; 3519 3520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3521 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3522 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3523 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3524 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3525 M = header[1]; N = header[2]; nz = header[3]; 3526 3527 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3528 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3529 3530 /* 3531 This code adds extra rows to make sure the number of rows is 3532 divisible by the blocksize 3533 */ 3534 mbs = M/bs; 3535 extra_rows = bs - M + bs*(mbs); 3536 if (extra_rows == bs) extra_rows = 0; 3537 else mbs++; 3538 if (extra_rows) { 3539 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3540 } 3541 3542 /* Set global sizes if not already set */ 3543 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3544 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3545 } else { /* Check if the matrix global sizes are correct */ 3546 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3547 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3548 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3549 } 3550 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3551 } 3552 3553 /* read in row lengths */ 3554 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3555 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3556 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3557 3558 /* read in column indices */ 3559 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3560 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3561 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3562 3563 /* loop over row lengths determining block row lengths */ 3564 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3565 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3566 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3567 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3568 rowcount = 0; 3569 nzcount = 0; 3570 for (i=0; i<mbs; i++) { 3571 nmask = 0; 3572 for (j=0; j<bs; j++) { 3573 kmax = rowlengths[rowcount]; 3574 for (k=0; k<kmax; k++) { 3575 tmp = jj[nzcount++]/bs; 3576 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3577 } 3578 rowcount++; 3579 } 3580 browlengths[i] += nmask; 3581 /* zero out the mask elements we set */ 3582 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3583 } 3584 3585 /* Do preallocation */ 3586 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3587 a = (Mat_SeqBAIJ*)newmat->data; 3588 3589 /* set matrix "i" values */ 3590 a->i[0] = 0; 3591 for (i=1; i<= mbs; i++) { 3592 a->i[i] = a->i[i-1] + browlengths[i-1]; 3593 a->ilen[i-1] = browlengths[i-1]; 3594 } 3595 a->nz = 0; 3596 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3597 3598 /* read in nonzero values */ 3599 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3600 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3601 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3602 3603 /* set "a" and "j" values into matrix */ 3604 nzcount = 0; jcount = 0; 3605 for (i=0; i<mbs; i++) { 3606 nzcountb = nzcount; 3607 nmask = 0; 3608 for (j=0; j<bs; j++) { 3609 kmax = rowlengths[i*bs+j]; 3610 for (k=0; k<kmax; k++) { 3611 tmp = jj[nzcount++]/bs; 3612 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3613 } 3614 } 3615 /* sort the masked values */ 3616 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3617 3618 /* set "j" values into matrix */ 3619 maskcount = 1; 3620 for (j=0; j<nmask; j++) { 3621 a->j[jcount++] = masked[j]; 3622 mask[masked[j]] = maskcount++; 3623 } 3624 /* set "a" values into matrix */ 3625 ishift = bs2*a->i[i]; 3626 for (j=0; j<bs; j++) { 3627 kmax = rowlengths[i*bs+j]; 3628 for (k=0; k<kmax; k++) { 3629 tmp = jj[nzcountb]/bs; 3630 block = mask[tmp] - 1; 3631 point = jj[nzcountb] - bs*tmp; 3632 idx = ishift + bs2*block + j + bs*point; 3633 a->a[idx] = (MatScalar)aa[nzcountb++]; 3634 } 3635 } 3636 /* zero out the mask elements we set */ 3637 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3638 } 3639 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3640 3641 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3642 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3643 ierr = PetscFree(aa);CHKERRQ(ierr); 3644 ierr = PetscFree(jj);CHKERRQ(ierr); 3645 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3646 3647 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3649 PetscFunctionReturn(0); 3650 } 3651 3652 #undef __FUNCT__ 3653 #define __FUNCT__ "MatCreateSeqBAIJ" 3654 /*@C 3655 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3656 compressed row) format. For good matrix assembly performance the 3657 user should preallocate the matrix storage by setting the parameter nz 3658 (or the array nnz). By setting these parameters accurately, performance 3659 during matrix assembly can be increased by more than a factor of 50. 3660 3661 Collective on MPI_Comm 3662 3663 Input Parameters: 3664 + comm - MPI communicator, set to PETSC_COMM_SELF 3665 . bs - size of block 3666 . m - number of rows 3667 . n - number of columns 3668 . nz - number of nonzero blocks per block row (same for all rows) 3669 - nnz - array containing the number of nonzero blocks in the various block rows 3670 (possibly different for each block row) or NULL 3671 3672 Output Parameter: 3673 . A - the matrix 3674 3675 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3676 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3677 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3678 3679 Options Database Keys: 3680 . -mat_no_unroll - uses code that does not unroll the loops in the 3681 block calculations (much slower) 3682 . -mat_block_size - size of the blocks to use 3683 3684 Level: intermediate 3685 3686 Notes: 3687 The number of rows and columns must be divisible by blocksize. 3688 3689 If the nnz parameter is given then the nz parameter is ignored 3690 3691 A nonzero block is any block that as 1 or more nonzeros in it 3692 3693 The block AIJ format is fully compatible with standard Fortran 77 3694 storage. That is, the stored row and column indices can begin at 3695 either one (as in Fortran) or zero. See the users' manual for details. 3696 3697 Specify the preallocated storage with either nz or nnz (not both). 3698 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3699 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3700 matrices. 3701 3702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3703 @*/ 3704 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3705 { 3706 PetscErrorCode ierr; 3707 3708 PetscFunctionBegin; 3709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3710 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3711 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3712 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3713 PetscFunctionReturn(0); 3714 } 3715 3716 #undef __FUNCT__ 3717 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3718 /*@C 3719 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3720 per row in the matrix. For good matrix assembly performance the 3721 user should preallocate the matrix storage by setting the parameter nz 3722 (or the array nnz). By setting these parameters accurately, performance 3723 during matrix assembly can be increased by more than a factor of 50. 3724 3725 Collective on MPI_Comm 3726 3727 Input Parameters: 3728 + A - the matrix 3729 . bs - size of block 3730 . nz - number of block nonzeros per block row (same for all rows) 3731 - nnz - array containing the number of block nonzeros in the various block rows 3732 (possibly different for each block row) or NULL 3733 3734 Options Database Keys: 3735 . -mat_no_unroll - uses code that does not unroll the loops in the 3736 block calculations (much slower) 3737 . -mat_block_size - size of the blocks to use 3738 3739 Level: intermediate 3740 3741 Notes: 3742 If the nnz parameter is given then the nz parameter is ignored 3743 3744 You can call MatGetInfo() to get information on how effective the preallocation was; 3745 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3746 You can also run with the option -info and look for messages with the string 3747 malloc in them to see if additional memory allocation was needed. 3748 3749 The block AIJ format is fully compatible with standard Fortran 77 3750 storage. That is, the stored row and column indices can begin at 3751 either one (as in Fortran) or zero. See the users' manual for details. 3752 3753 Specify the preallocated storage with either nz or nnz (not both). 3754 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3755 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3756 3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3758 @*/ 3759 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3760 { 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3765 PetscValidType(B,1); 3766 PetscValidLogicalCollectiveInt(B,bs,2); 3767 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3773 /*@C 3774 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3775 (the default sequential PETSc format). 3776 3777 Collective on MPI_Comm 3778 3779 Input Parameters: 3780 + A - the matrix 3781 . i - the indices into j for the start of each local row (starts with zero) 3782 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3783 - v - optional values in the matrix 3784 3785 Level: developer 3786 3787 .keywords: matrix, aij, compressed row, sparse 3788 3789 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3790 @*/ 3791 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3792 { 3793 PetscErrorCode ierr; 3794 3795 PetscFunctionBegin; 3796 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3797 PetscValidType(B,1); 3798 PetscValidLogicalCollectiveInt(B,bs,2); 3799 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3800 PetscFunctionReturn(0); 3801 } 3802 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3806 /*@ 3807 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3808 3809 Collective on MPI_Comm 3810 3811 Input Parameters: 3812 + comm - must be an MPI communicator of size 1 3813 . bs - size of block 3814 . m - number of rows 3815 . n - number of columns 3816 . i - row indices 3817 . j - column indices 3818 - a - matrix values 3819 3820 Output Parameter: 3821 . mat - the matrix 3822 3823 Level: advanced 3824 3825 Notes: 3826 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3827 once the matrix is destroyed 3828 3829 You cannot set new nonzero locations into this matrix, that will generate an error. 3830 3831 The i and j indices are 0 based 3832 3833 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3834 3835 3836 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3837 3838 @*/ 3839 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3840 { 3841 PetscErrorCode ierr; 3842 PetscInt ii; 3843 Mat_SeqBAIJ *baij; 3844 3845 PetscFunctionBegin; 3846 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3847 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3848 3849 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3850 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3851 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3852 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3853 baij = (Mat_SeqBAIJ*)(*mat)->data; 3854 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3855 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3856 3857 baij->i = i; 3858 baij->j = j; 3859 baij->a = a; 3860 3861 baij->singlemalloc = PETSC_FALSE; 3862 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3863 baij->free_a = PETSC_FALSE; 3864 baij->free_ij = PETSC_FALSE; 3865 3866 for (ii=0; ii<m; ii++) { 3867 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3868 #if defined(PETSC_USE_DEBUG) 3869 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3870 #endif 3871 } 3872 #if defined(PETSC_USE_DEBUG) 3873 for (ii=0; ii<baij->i[m]; ii++) { 3874 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3875 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3876 } 3877 #endif 3878 3879 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3881 PetscFunctionReturn(0); 3882 } 3883