1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 166 x[i2] = idiag[0]*s1; 167 idiag += 1; 168 i2 += 1; 169 } 170 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 171 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 172 } 173 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 174 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 175 i2 = 0; 176 mdiag = a->idiag+a->mbs; 177 for (i=0; i<m; i++) { 178 x1 = x[i2]; 179 x[i2] = mdiag[0]*x1; 180 mdiag += 1; 181 i2 += 1; 182 } 183 ierr = PetscLogFlops(m);CHKERRQ(ierr); 184 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 185 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 186 } 187 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 188 idiag = a->idiag+a->mbs - 1; 189 i2 = m - 1; 190 x1 = x[i2]; 191 x[i2] = idiag[0]*x1; 192 idiag -= 1; 193 i2 -= 1; 194 for (i=m-2; i>=0; i--) { 195 v = aa + (diag[i]+1); 196 vi = aj + diag[i] + 1; 197 nz = ai[i+1] - diag[i] - 1; 198 s1 = x[i2]; 199 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 200 x[i2] = idiag[0]*s1; 201 idiag -= 1; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 PetscScalar *x,x1,x2,s1,s2; 218 const PetscScalar *b; 219 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 220 PetscErrorCode ierr; 221 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 222 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 223 224 PetscFunctionBegin; 225 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 226 its = its*lits; 227 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 228 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 229 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 230 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 231 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 232 233 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 234 235 diag = a->diag; 236 idiag = a->idiag; 237 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 239 240 if (flag & SOR_ZERO_INITIAL_GUESS) { 241 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 242 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 243 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 244 i2 = 2; 245 idiag += 4; 246 for (i=1; i<m; i++) { 247 v = aa + 4*ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s1 = b[i2]; s2 = b[i2+1]; 251 for (j=0; j<nz; j++) { 252 idx = 2*vi[j]; 253 it = 4*j; 254 x1 = x[idx]; x2 = x[1+idx]; 255 s1 -= v[it]*x1 + v[it+2]*x2; 256 s2 -= v[it+1]*x1 + v[it+3]*x2; 257 } 258 x[i2] = idiag[0]*s1 + idiag[2]*s2; 259 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 260 idiag += 4; 261 i2 += 2; 262 } 263 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 264 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 265 } 266 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 267 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 268 i2 = 0; 269 mdiag = a->idiag+4*a->mbs; 270 for (i=0; i<m; i++) { 271 x1 = x[i2]; x2 = x[i2+1]; 272 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 273 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 274 mdiag += 4; 275 i2 += 2; 276 } 277 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 278 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 279 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 280 } 281 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 282 idiag = a->idiag+4*a->mbs - 4; 283 i2 = 2*m - 2; 284 x1 = x[i2]; x2 = x[i2+1]; 285 x[i2] = idiag[0]*x1 + idiag[2]*x2; 286 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 287 idiag -= 4; 288 i2 -= 2; 289 for (i=m-2; i>=0; i--) { 290 v = aa + 4*(diag[i]+1); 291 vi = aj + diag[i] + 1; 292 nz = ai[i+1] - diag[i] - 1; 293 s1 = x[i2]; s2 = x[i2+1]; 294 for (j=0; j<nz; j++) { 295 idx = 2*vi[j]; 296 it = 4*j; 297 x1 = x[idx]; x2 = x[1+idx]; 298 s1 -= v[it]*x1 + v[it+2]*x2; 299 s2 -= v[it+1]*x1 + v[it+3]*x2; 300 } 301 x[i2] = idiag[0]*s1 + idiag[2]*s2; 302 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 303 idiag -= 4; 304 i2 -= 2; 305 } 306 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 307 } 308 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 317 { 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 319 PetscScalar *x,x1,x2,x3,s1,s2,s3; 320 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 321 const PetscScalar *b; 322 PetscErrorCode ierr; 323 PetscInt m = a->mbs,i,i2,nz,idx; 324 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325 326 PetscFunctionBegin; 327 its = its*lits; 328 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 329 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 332 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334 335 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 336 337 diag = a->diag; 338 idiag = a->idiag; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 341 342 if (flag & SOR_ZERO_INITIAL_GUESS) { 343 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 344 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347 i2 = 3; 348 idiag += 9; 349 for (i=1; i<m; i++) { 350 v = aa + 9*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354 while (nz--) { 355 idx = 3*(*vi++); 356 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365 idiag += 9; 366 i2 += 3; 367 } 368 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 370 } 371 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373 i2 = 0; 374 mdiag = a->idiag+9*a->mbs; 375 for (i=0; i<m; i++) { 376 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380 mdiag += 9; 381 i2 += 3; 382 } 383 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 384 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 386 } 387 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 388 idiag = a->idiag+9*a->mbs - 9; 389 i2 = 3*m - 3; 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394 idiag -= 9; 395 i2 -= 3; 396 for (i=m-2; i>=0; i--) { 397 v = aa + 9*(diag[i]+1); 398 vi = aj + diag[i] + 1; 399 nz = ai[i+1] - diag[i] - 1; 400 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401 while (nz--) { 402 idx = 3*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407 v += 9; 408 } 409 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412 idiag -= 9; 413 i2 -= 3; 414 } 415 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 416 } 417 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 418 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 429 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 430 const PetscScalar *b; 431 PetscErrorCode ierr; 432 PetscInt m = a->mbs,i,i2,nz,idx; 433 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 434 435 PetscFunctionBegin; 436 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 437 its = its*lits; 438 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 439 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 440 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 441 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 442 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 443 444 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 445 446 diag = a->diag; 447 idiag = a->idiag; 448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 450 451 if (flag & SOR_ZERO_INITIAL_GUESS) { 452 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 453 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 454 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 455 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 456 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 457 i2 = 4; 458 idiag += 16; 459 for (i=1; i<m; i++) { 460 v = aa + 16*ai[i]; 461 vi = aj + ai[i]; 462 nz = diag[i] - ai[i]; 463 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 464 while (nz--) { 465 idx = 4*(*vi++); 466 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 467 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 468 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 469 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 470 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 471 v += 16; 472 } 473 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 474 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 475 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 476 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 477 idiag += 16; 478 i2 += 4; 479 } 480 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 481 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 482 } 483 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 484 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 485 i2 = 0; 486 mdiag = a->idiag+16*a->mbs; 487 for (i=0; i<m; i++) { 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 489 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 490 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 491 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 492 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 493 mdiag += 16; 494 i2 += 4; 495 } 496 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 497 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 498 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 499 } 500 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 501 idiag = a->idiag+16*a->mbs - 16; 502 i2 = 4*m - 4; 503 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 504 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 505 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 506 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 507 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 508 idiag -= 16; 509 i2 -= 4; 510 for (i=m-2; i>=0; i--) { 511 v = aa + 16*(diag[i]+1); 512 vi = aj + diag[i] + 1; 513 nz = ai[i+1] - diag[i] - 1; 514 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 515 while (nz--) { 516 idx = 4*(*vi++); 517 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 518 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 519 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 520 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 521 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 522 v += 16; 523 } 524 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 525 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 526 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 527 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 532 } 533 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 534 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 545 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 546 const PetscScalar *b; 547 PetscErrorCode ierr; 548 PetscInt m = a->mbs,i,i2,nz,idx; 549 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 550 551 PetscFunctionBegin; 552 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 553 its = its*lits; 554 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 555 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 556 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 557 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 558 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 559 560 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 561 562 diag = a->diag; 563 idiag = a->idiag; 564 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 565 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 566 567 if (flag & SOR_ZERO_INITIAL_GUESS) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 570 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 571 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 572 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 573 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 574 i2 = 5; 575 idiag += 25; 576 for (i=1; i<m; i++) { 577 v = aa + 25*ai[i]; 578 vi = aj + ai[i]; 579 nz = diag[i] - ai[i]; 580 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 581 while (nz--) { 582 idx = 5*(*vi++); 583 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 584 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 585 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 586 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 587 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 588 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 589 v += 25; 590 } 591 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 592 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 593 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 594 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 595 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 596 idiag += 25; 597 i2 += 5; 598 } 599 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 600 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 601 } 602 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 603 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 604 i2 = 0; 605 mdiag = a->idiag+25*a->mbs; 606 for (i=0; i<m; i++) { 607 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 608 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 609 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 610 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 611 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 612 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 613 mdiag += 25; 614 i2 += 5; 615 } 616 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 617 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 618 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 619 } 620 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 621 idiag = a->idiag+25*a->mbs - 25; 622 i2 = 5*m - 5; 623 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 624 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 625 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 626 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 627 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 628 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 629 idiag -= 25; 630 i2 -= 5; 631 for (i=m-2; i>=0; i--) { 632 v = aa + 25*(diag[i]+1); 633 vi = aj + diag[i] + 1; 634 nz = ai[i+1] - diag[i] - 1; 635 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 636 while (nz--) { 637 idx = 5*(*vi++); 638 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 639 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 640 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 641 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 642 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 643 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 644 v += 25; 645 } 646 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 647 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 648 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 649 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 650 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 651 idiag -= 25; 652 i2 -= 5; 653 } 654 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 655 } 656 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 665 { 666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 667 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 668 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 669 const PetscScalar *b; 670 PetscErrorCode ierr; 671 PetscInt m = a->mbs,i,i2,nz,idx; 672 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 673 674 PetscFunctionBegin; 675 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 676 its = its*lits; 677 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 678 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 679 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 680 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 681 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 682 683 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 684 685 diag = a->diag; 686 idiag = a->idiag; 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 689 690 if (flag & SOR_ZERO_INITIAL_GUESS) { 691 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 692 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 693 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 694 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 695 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 696 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 697 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 698 i2 = 6; 699 idiag += 36; 700 for (i=1; i<m; i++) { 701 v = aa + 36*ai[i]; 702 vi = aj + ai[i]; 703 nz = diag[i] - ai[i]; 704 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 705 while (nz--) { 706 idx = 6*(*vi++); 707 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 708 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 709 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 710 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 711 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 712 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 713 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 714 v += 36; 715 } 716 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 717 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 718 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 719 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 720 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 721 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 722 idiag += 36; 723 i2 += 6; 724 } 725 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 726 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 727 } 728 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 729 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 730 i2 = 0; 731 mdiag = a->idiag+36*a->mbs; 732 for (i=0; i<m; i++) { 733 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 734 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 735 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 736 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 737 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 738 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 739 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 740 mdiag += 36; 741 i2 += 6; 742 } 743 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 744 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 745 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 746 } 747 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748 idiag = a->idiag+36*a->mbs - 36; 749 i2 = 6*m - 6; 750 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 751 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 752 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 753 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 754 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 755 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 756 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 757 idiag -= 36; 758 i2 -= 6; 759 for (i=m-2; i>=0; i--) { 760 v = aa + 36*(diag[i]+1); 761 vi = aj + diag[i] + 1; 762 nz = ai[i+1] - diag[i] - 1; 763 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 764 while (nz--) { 765 idx = 6*(*vi++); 766 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 767 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 768 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 769 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 770 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 771 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 772 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 773 v += 36; 774 } 775 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 776 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 777 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 778 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 779 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 780 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 781 idiag -= 36; 782 i2 -= 6; 783 } 784 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 785 } 786 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 795 { 796 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 797 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 798 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 799 const PetscScalar *b; 800 PetscErrorCode ierr; 801 PetscInt m = a->mbs,i,i2,nz,idx; 802 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 803 804 PetscFunctionBegin; 805 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 806 its = its*lits; 807 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 808 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 809 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 810 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 811 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 812 813 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 814 815 diag = a->diag; 816 idiag = a->idiag; 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 819 820 if (flag & SOR_ZERO_INITIAL_GUESS) { 821 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 822 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 823 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 824 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 825 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 826 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 827 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 828 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 829 i2 = 7; 830 idiag += 49; 831 for (i=1; i<m; i++) { 832 v = aa + 49*ai[i]; 833 vi = aj + ai[i]; 834 nz = diag[i] - ai[i]; 835 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 836 while (nz--) { 837 idx = 7*(*vi++); 838 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 839 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 840 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 841 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 842 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 843 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 844 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 845 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 846 v += 49; 847 } 848 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 849 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 850 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 851 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 852 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 853 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 854 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 855 idiag += 49; 856 i2 += 7; 857 } 858 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 859 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 860 } 861 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 862 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 863 i2 = 0; 864 mdiag = a->idiag+49*a->mbs; 865 for (i=0; i<m; i++) { 866 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 867 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 868 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 869 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 870 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 871 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 872 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 873 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 874 mdiag += 49; 875 i2 += 7; 876 } 877 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 878 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 879 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 882 idiag = a->idiag+49*a->mbs - 49; 883 i2 = 7*m - 7; 884 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 885 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 886 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 887 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 888 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 889 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 890 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 891 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 892 idiag -= 49; 893 i2 -= 7; 894 for (i=m-2; i>=0; i--) { 895 v = aa + 49*(diag[i]+1); 896 vi = aj + diag[i] + 1; 897 nz = ai[i+1] - diag[i] - 1; 898 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 899 while (nz--) { 900 idx = 7*(*vi++); 901 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 902 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 903 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 904 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 905 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 906 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 907 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 908 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 909 v += 49; 910 } 911 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 912 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 913 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 914 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 915 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 916 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 917 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 918 idiag -= 49; 919 i2 -= 7; 920 } 921 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 922 } 923 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 924 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 925 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscScalar *x,*work,*w,*workt; 935 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 936 const PetscScalar *b; 937 PetscErrorCode ierr; 938 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 939 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 940 941 PetscFunctionBegin; 942 its = its*lits; 943 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 944 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 945 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 946 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 947 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 948 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 949 950 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 951 952 diag = a->diag; 953 idiag = a->idiag; 954 if (!a->mult_work) { 955 k = PetscMax(A->rmap->n,A->cmap->n); 956 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 957 } 958 work = a->mult_work; 959 if (!a->sor_work) { 960 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 961 } 962 w = a->sor_work; 963 964 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 966 967 if (flag & SOR_ZERO_INITIAL_GUESS) { 968 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 969 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 970 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 971 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 972 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 973 i2 = bs; 974 idiag += bs2; 975 for (i=1; i<m; i++) { 976 v = aa + bs2*ai[i]; 977 vi = aj + ai[i]; 978 nz = diag[i] - ai[i]; 979 980 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 981 /* copy all rows of x that are needed into contiguous space */ 982 workt = work; 983 for (j=0; j<nz; j++) { 984 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 985 workt += bs; 986 } 987 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 988 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 989 while (nz--) { 990 idx = N*(*vi++); 991 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 992 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 993 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 994 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 995 v += N2; 996 } */ 997 998 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 999 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1000 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1001 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1002 1003 idiag += bs2; 1004 i2 += bs; 1005 } 1006 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1007 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1008 } 1009 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1010 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1011 i2 = 0; 1012 mdiag = a->idiag+bs2*a->mbs; 1013 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1014 for (i=0; i<m; i++) { 1015 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1016 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1017 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1018 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1019 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1020 1021 mdiag += bs2; 1022 i2 += bs; 1023 } 1024 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1025 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1026 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1027 } 1028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1029 idiag = a->idiag+bs2*a->mbs - bs2; 1030 i2 = bs*m - bs; 1031 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1033 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1034 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1035 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1036 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1037 idiag -= bs2; 1038 i2 -= bs; 1039 for (i=m-2; i>=0; i--) { 1040 v = aa + bs2*(diag[i]+1); 1041 vi = aj + diag[i] + 1; 1042 nz = ai[i+1] - diag[i] - 1; 1043 1044 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1045 /* copy all rows of x that are needed into contiguous space */ 1046 workt = work; 1047 for (j=0; j<nz; j++) { 1048 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 workt += bs; 1050 } 1051 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1052 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1053 while (nz--) { 1054 idx = N*(*vi++); 1055 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1056 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1057 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1058 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1059 v += N2; 1060 } */ 1061 1062 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1063 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1064 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1065 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1066 idiag -= bs2; 1067 i2 -= bs; 1068 } 1069 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1070 } 1071 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /* 1078 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1079 */ 1080 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1083 #define matsetvaluesblocked4_ matsetvaluesblocked4 1084 #endif 1085 1086 EXTERN_C_BEGIN 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "matsetvaluesblocked4_" 1089 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1090 { 1091 Mat A = *AA; 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1094 PetscInt *ai =a->i,*ailen=a->ilen; 1095 PetscInt *aj =a->j,stepval,lastcol = -1; 1096 const PetscScalar *value = v; 1097 MatScalar *ap,*aa = a->a,*bap; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1101 stepval = (n-1)*4; 1102 for (k=0; k<m; k++) { /* loop over added rows */ 1103 row = im[k]; 1104 rp = aj + ai[row]; 1105 ap = aa + 16*ai[row]; 1106 nrow = ailen[row]; 1107 low = 0; 1108 high = nrow; 1109 for (l=0; l<n; l++) { /* loop over added columns */ 1110 col = in[l]; 1111 if (col <= lastcol) low = 0; 1112 else high = nrow; 1113 lastcol = col; 1114 value = v + k*(stepval+4 + l)*4; 1115 while (high-low > 7) { 1116 t = (low+high)/2; 1117 if (rp[t] > col) high = t; 1118 else low = t; 1119 } 1120 for (i=low; i<high; i++) { 1121 if (rp[i] > col) break; 1122 if (rp[i] == col) { 1123 bap = ap + 16*i; 1124 for (ii=0; ii<4; ii++,value+=stepval) { 1125 for (jj=ii; jj<16; jj+=4) { 1126 bap[jj] += *value++; 1127 } 1128 } 1129 goto noinsert2; 1130 } 1131 } 1132 N = nrow++ - 1; 1133 high++; /* added new column index thus must search to one higher than before */ 1134 /* shift up all the later entries in this row */ 1135 for (ii=N; ii>=i; ii--) { 1136 rp[ii+1] = rp[ii]; 1137 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1138 } 1139 if (N >= i) { 1140 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1141 } 1142 rp[i] = col; 1143 bap = ap + 16*i; 1144 for (ii=0; ii<4; ii++,value+=stepval) { 1145 for (jj=ii; jj<16; jj+=4) { 1146 bap[jj] = *value++; 1147 } 1148 } 1149 noinsert2:; 1150 low = i; 1151 } 1152 ailen[row] = nrow; 1153 } 1154 PetscFunctionReturnVoid(); 1155 } 1156 EXTERN_C_END 1157 1158 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1159 #define matsetvalues4_ MATSETVALUES4 1160 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1161 #define matsetvalues4_ matsetvalues4 1162 #endif 1163 1164 EXTERN_C_BEGIN 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatSetValues4_" 1167 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1168 { 1169 Mat A = *AA; 1170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1171 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1172 PetscInt *ai=a->i,*ailen=a->ilen; 1173 PetscInt *aj=a->j,brow,bcol; 1174 PetscInt ridx,cidx,lastcol = -1; 1175 MatScalar *ap,value,*aa=a->a,*bap; 1176 1177 PetscFunctionBegin; 1178 for (k=0; k<m; k++) { /* loop over added rows */ 1179 row = im[k]; brow = row/4; 1180 rp = aj + ai[brow]; 1181 ap = aa + 16*ai[brow]; 1182 nrow = ailen[brow]; 1183 low = 0; 1184 high = nrow; 1185 for (l=0; l<n; l++) { /* loop over added columns */ 1186 col = in[l]; bcol = col/4; 1187 ridx = row % 4; cidx = col % 4; 1188 value = v[l + k*n]; 1189 if (col <= lastcol) low = 0; 1190 else high = nrow; 1191 lastcol = col; 1192 while (high-low > 7) { 1193 t = (low+high)/2; 1194 if (rp[t] > bcol) high = t; 1195 else low = t; 1196 } 1197 for (i=low; i<high; i++) { 1198 if (rp[i] > bcol) break; 1199 if (rp[i] == bcol) { 1200 bap = ap + 16*i + 4*cidx + ridx; 1201 *bap += value; 1202 goto noinsert1; 1203 } 1204 } 1205 N = nrow++ - 1; 1206 high++; /* added new column thus must search to one higher than before */ 1207 /* shift up all the later entries in this row */ 1208 for (ii=N; ii>=i; ii--) { 1209 rp[ii+1] = rp[ii]; 1210 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1211 } 1212 if (N>=i) { 1213 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1214 } 1215 rp[i] = bcol; 1216 ap[16*i + 4*cidx + ridx] = value; 1217 noinsert1:; 1218 low = i; 1219 } 1220 ailen[brow] = nrow; 1221 } 1222 PetscFunctionReturnVoid(); 1223 } 1224 EXTERN_C_END 1225 1226 /* 1227 Checks for missing diagonals 1228 */ 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1231 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1232 { 1233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 PetscInt *diag,*jj = a->j,i; 1236 1237 PetscFunctionBegin; 1238 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1239 *missing = PETSC_FALSE; 1240 if (A->rmap->n > 0 && !jj) { 1241 *missing = PETSC_TRUE; 1242 if (d) *d = 0; 1243 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1244 } else { 1245 diag = a->diag; 1246 for (i=0; i<a->mbs; i++) { 1247 if (jj[diag[i]] != i) { 1248 *missing = PETSC_TRUE; 1249 if (d) *d = i; 1250 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1251 break; 1252 } 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1260 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1261 { 1262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 PetscInt i,j,m = a->mbs; 1265 1266 PetscFunctionBegin; 1267 if (!a->diag) { 1268 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1269 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1270 a->free_diag = PETSC_TRUE; 1271 } 1272 for (i=0; i<m; i++) { 1273 a->diag[i] = a->i[i+1]; 1274 for (j=a->i[i]; j<a->i[i+1]; j++) { 1275 if (a->j[j] == i) { 1276 a->diag[i] = j; 1277 break; 1278 } 1279 } 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1287 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1288 { 1289 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1290 PetscErrorCode ierr; 1291 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1292 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1293 1294 PetscFunctionBegin; 1295 *nn = n; 1296 if (!ia) PetscFunctionReturn(0); 1297 if (symmetric) { 1298 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1299 nz = tia[n]; 1300 } else { 1301 tia = a->i; tja = a->j; 1302 } 1303 1304 if (!blockcompressed && bs > 1) { 1305 (*nn) *= bs; 1306 /* malloc & create the natural set of indices */ 1307 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1308 if (n) { 1309 (*ia)[0] = 0; 1310 for (j=1; j<bs; j++) { 1311 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1312 } 1313 } 1314 1315 for (i=1; i<n; i++) { 1316 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1317 for (j=1; j<bs; j++) { 1318 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1319 } 1320 } 1321 if (n) { 1322 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1323 } 1324 1325 if (inja) { 1326 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1327 cnt = 0; 1328 for (i=0; i<n; i++) { 1329 for (j=0; j<bs; j++) { 1330 for (k=tia[i]; k<tia[i+1]; k++) { 1331 for (l=0; l<bs; l++) { 1332 (*ja)[cnt++] = bs*tja[k] + l; 1333 } 1334 } 1335 } 1336 } 1337 } 1338 1339 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1340 ierr = PetscFree(tia);CHKERRQ(ierr); 1341 ierr = PetscFree(tja);CHKERRQ(ierr); 1342 } 1343 } else if (oshift == 1) { 1344 if (symmetric) { 1345 nz = tia[A->rmap->n/bs]; 1346 /* add 1 to i and j indices */ 1347 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1348 *ia = tia; 1349 if (ja) { 1350 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1351 *ja = tja; 1352 } 1353 } else { 1354 nz = a->i[A->rmap->n/bs]; 1355 /* malloc space and add 1 to i and j indices */ 1356 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1357 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1358 if (ja) { 1359 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1360 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1361 } 1362 } 1363 } else { 1364 *ia = tia; 1365 if (ja) *ja = tja; 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1372 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1373 { 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 if (!ia) PetscFunctionReturn(0); 1378 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1379 ierr = PetscFree(*ia);CHKERRQ(ierr); 1380 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1387 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1388 { 1389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 #if defined(PETSC_USE_LOG) 1394 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1395 #endif 1396 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1397 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1398 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1399 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1400 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1401 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1402 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1403 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1404 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1405 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1406 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1407 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1408 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1409 1410 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1411 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1412 ierr = PetscFree(A->data);CHKERRQ(ierr); 1413 1414 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1415 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1430 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 switch (op) { 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 break; 1440 case MAT_KEEP_NONZERO_PATTERN: 1441 a->keepnonzeropattern = flg; 1442 break; 1443 case MAT_NEW_NONZERO_LOCATIONS: 1444 a->nonew = (flg ? 0 : 1); 1445 break; 1446 case MAT_NEW_NONZERO_LOCATION_ERR: 1447 a->nonew = (flg ? -1 : 0); 1448 break; 1449 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1450 a->nonew = (flg ? -2 : 0); 1451 break; 1452 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1453 a->nounused = (flg ? -1 : 0); 1454 break; 1455 case MAT_CHECK_COMPRESSED_ROW: 1456 a->compressedrow.check = flg; 1457 break; 1458 case MAT_NEW_DIAGONALS: 1459 case MAT_IGNORE_OFF_PROC_ENTRIES: 1460 case MAT_USE_HASH_TABLE: 1461 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1462 break; 1463 case MAT_SPD: 1464 case MAT_SYMMETRIC: 1465 case MAT_STRUCTURALLY_SYMMETRIC: 1466 case MAT_HERMITIAN: 1467 case MAT_SYMMETRY_ETERNAL: 1468 /* These options are handled directly by MatSetOption() */ 1469 break; 1470 default: 1471 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1478 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1479 { 1480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1483 MatScalar *aa,*aa_i; 1484 PetscScalar *v_i; 1485 1486 PetscFunctionBegin; 1487 bs = A->rmap->bs; 1488 ai = a->i; 1489 aj = a->j; 1490 aa = a->a; 1491 bs2 = a->bs2; 1492 1493 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1494 1495 bn = row/bs; /* Block number */ 1496 bp = row % bs; /* Block Position */ 1497 M = ai[bn+1] - ai[bn]; 1498 *nz = bs*M; 1499 1500 if (v) { 1501 *v = 0; 1502 if (*nz) { 1503 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1504 for (i=0; i<M; i++) { /* for each block in the block row */ 1505 v_i = *v + i*bs; 1506 aa_i = aa + bs2*(ai[bn] + i); 1507 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1508 } 1509 } 1510 } 1511 1512 if (idx) { 1513 *idx = 0; 1514 if (*nz) { 1515 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1516 for (i=0; i<M; i++) { /* for each block in the block row */ 1517 idx_i = *idx + i*bs; 1518 itmp = bs*aj[ai[bn] + i]; 1519 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1520 } 1521 } 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1528 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1529 { 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1534 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1535 PetscFunctionReturn(0); 1536 } 1537 1538 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1542 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1543 { 1544 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1545 Mat C; 1546 PetscErrorCode ierr; 1547 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1548 PetscInt *rows,*cols,bs2=a->bs2; 1549 MatScalar *array; 1550 1551 PetscFunctionBegin; 1552 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1553 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1554 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1555 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1556 1557 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1558 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1559 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1560 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1561 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1562 ierr = PetscFree(col);CHKERRQ(ierr); 1563 } else { 1564 C = *B; 1565 } 1566 1567 array = a->a; 1568 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1569 for (i=0; i<mbs; i++) { 1570 cols[0] = i*bs; 1571 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1572 len = ai[i+1] - ai[i]; 1573 for (j=0; j<len; j++) { 1574 rows[0] = (*aj++)*bs; 1575 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1576 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1577 array += bs2; 1578 } 1579 } 1580 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1581 1582 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 1585 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1586 *B = C; 1587 } else { 1588 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1595 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1596 { 1597 PetscErrorCode ierr; 1598 Mat Btrans; 1599 1600 PetscFunctionBegin; 1601 *f = PETSC_FALSE; 1602 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1603 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1604 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1610 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1611 { 1612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1615 int fd; 1616 PetscScalar *aa; 1617 FILE *file; 1618 1619 PetscFunctionBegin; 1620 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1621 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1622 col_lens[0] = MAT_FILE_CLASSID; 1623 1624 col_lens[1] = A->rmap->N; 1625 col_lens[2] = A->cmap->n; 1626 col_lens[3] = a->nz*bs2; 1627 1628 /* store lengths of each row and write (including header) to file */ 1629 count = 0; 1630 for (i=0; i<a->mbs; i++) { 1631 for (j=0; j<bs; j++) { 1632 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1633 } 1634 } 1635 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1636 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1637 1638 /* store column indices (zero start index) */ 1639 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1640 count = 0; 1641 for (i=0; i<a->mbs; i++) { 1642 for (j=0; j<bs; j++) { 1643 for (k=a->i[i]; k<a->i[i+1]; k++) { 1644 for (l=0; l<bs; l++) { 1645 jj[count++] = bs*a->j[k] + l; 1646 } 1647 } 1648 } 1649 } 1650 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1651 ierr = PetscFree(jj);CHKERRQ(ierr); 1652 1653 /* store nonzero values */ 1654 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1655 count = 0; 1656 for (i=0; i<a->mbs; i++) { 1657 for (j=0; j<bs; j++) { 1658 for (k=a->i[i]; k<a->i[i+1]; k++) { 1659 for (l=0; l<bs; l++) { 1660 aa[count++] = a->a[bs2*k + l*bs + j]; 1661 } 1662 } 1663 } 1664 } 1665 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1666 ierr = PetscFree(aa);CHKERRQ(ierr); 1667 1668 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1669 if (file) { 1670 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1671 } 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1677 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1680 PetscErrorCode ierr; 1681 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1682 PetscViewerFormat format; 1683 1684 PetscFunctionBegin; 1685 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1686 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1687 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1688 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1689 Mat aij; 1690 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1691 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1692 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1693 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1694 PetscFunctionReturn(0); 1695 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1696 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1697 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1698 for (i=0; i<a->mbs; i++) { 1699 for (j=0; j<bs; j++) { 1700 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1701 for (k=a->i[i]; k<a->i[i+1]; k++) { 1702 for (l=0; l<bs; l++) { 1703 #if defined(PETSC_USE_COMPLEX) 1704 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1705 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1706 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1707 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1708 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1709 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1710 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1711 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1712 } 1713 #else 1714 if (a->a[bs2*k + l*bs + j] != 0.0) { 1715 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1716 } 1717 #endif 1718 } 1719 } 1720 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1721 } 1722 } 1723 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1724 } else { 1725 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1726 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1727 for (i=0; i<a->mbs; i++) { 1728 for (j=0; j<bs; j++) { 1729 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1730 for (k=a->i[i]; k<a->i[i+1]; k++) { 1731 for (l=0; l<bs; l++) { 1732 #if defined(PETSC_USE_COMPLEX) 1733 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1734 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1735 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1736 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1738 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1739 } else { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1741 } 1742 #else 1743 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1744 #endif 1745 } 1746 } 1747 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1748 } 1749 } 1750 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1751 } 1752 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #include <petscdraw.h> 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1759 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1760 { 1761 Mat A = (Mat) Aa; 1762 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1763 PetscErrorCode ierr; 1764 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1765 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1766 MatScalar *aa; 1767 PetscViewer viewer; 1768 PetscViewerFormat format; 1769 1770 PetscFunctionBegin; 1771 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1772 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1773 1774 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1775 1776 /* loop over matrix elements drawing boxes */ 1777 1778 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1779 color = PETSC_DRAW_BLUE; 1780 for (i=0,row=0; i<mbs; i++,row+=bs) { 1781 for (j=a->i[i]; j<a->i[i+1]; j++) { 1782 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1783 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1784 aa = a->a + j*bs2; 1785 for (k=0; k<bs; k++) { 1786 for (l=0; l<bs; l++) { 1787 if (PetscRealPart(*aa++) >= 0.) continue; 1788 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1789 } 1790 } 1791 } 1792 } 1793 color = PETSC_DRAW_CYAN; 1794 for (i=0,row=0; i<mbs; i++,row+=bs) { 1795 for (j=a->i[i]; j<a->i[i+1]; j++) { 1796 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1797 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1798 aa = a->a + j*bs2; 1799 for (k=0; k<bs; k++) { 1800 for (l=0; l<bs; l++) { 1801 if (PetscRealPart(*aa++) != 0.) continue; 1802 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1803 } 1804 } 1805 } 1806 } 1807 color = PETSC_DRAW_RED; 1808 for (i=0,row=0; i<mbs; i++,row+=bs) { 1809 for (j=a->i[i]; j<a->i[i+1]; j++) { 1810 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1811 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1812 aa = a->a + j*bs2; 1813 for (k=0; k<bs; k++) { 1814 for (l=0; l<bs; l++) { 1815 if (PetscRealPart(*aa++) <= 0.) continue; 1816 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1817 } 1818 } 1819 } 1820 } 1821 } else { 1822 /* use contour shading to indicate magnitude of values */ 1823 /* first determine max of all nonzero values */ 1824 PetscDraw popup; 1825 PetscReal scale,maxv = 0.0; 1826 1827 for (i=0; i<a->nz*a->bs2; i++) { 1828 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1829 } 1830 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1831 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1832 if (popup) { 1833 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1834 } 1835 for (i=0,row=0; i<mbs; i++,row+=bs) { 1836 for (j=a->i[i]; j<a->i[i+1]; j++) { 1837 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1838 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1839 aa = a->a + j*bs2; 1840 for (k=0; k<bs; k++) { 1841 for (l=0; l<bs; l++) { 1842 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1843 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1844 } 1845 } 1846 } 1847 } 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1854 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1855 { 1856 PetscErrorCode ierr; 1857 PetscReal xl,yl,xr,yr,w,h; 1858 PetscDraw draw; 1859 PetscBool isnull; 1860 1861 PetscFunctionBegin; 1862 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1863 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1864 1865 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1866 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1867 xr += w; yr += h; xl = -w; yl = -h; 1868 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1869 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1870 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatView_SeqBAIJ" 1876 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1877 { 1878 PetscErrorCode ierr; 1879 PetscBool iascii,isbinary,isdraw; 1880 1881 PetscFunctionBegin; 1882 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1883 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1884 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1885 if (iascii) { 1886 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1887 } else if (isbinary) { 1888 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1889 } else if (isdraw) { 1890 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1891 } else { 1892 Mat B; 1893 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1894 ierr = MatView(B,viewer);CHKERRQ(ierr); 1895 ierr = MatDestroy(&B);CHKERRQ(ierr); 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1903 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1904 { 1905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1906 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1907 PetscInt *ai = a->i,*ailen = a->ilen; 1908 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1909 MatScalar *ap,*aa = a->a; 1910 1911 PetscFunctionBegin; 1912 for (k=0; k<m; k++) { /* loop over rows */ 1913 row = im[k]; brow = row/bs; 1914 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1915 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1916 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1917 nrow = ailen[brow]; 1918 for (l=0; l<n; l++) { /* loop over columns */ 1919 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1920 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1921 col = in[l]; 1922 bcol = col/bs; 1923 cidx = col%bs; 1924 ridx = row%bs; 1925 high = nrow; 1926 low = 0; /* assume unsorted */ 1927 while (high-low > 5) { 1928 t = (low+high)/2; 1929 if (rp[t] > bcol) high = t; 1930 else low = t; 1931 } 1932 for (i=low; i<high; i++) { 1933 if (rp[i] > bcol) break; 1934 if (rp[i] == bcol) { 1935 *v++ = ap[bs2*i+bs*cidx+ridx]; 1936 goto finished; 1937 } 1938 } 1939 *v++ = 0.0; 1940 finished:; 1941 } 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1948 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1949 { 1950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1951 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1952 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1953 PetscErrorCode ierr; 1954 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1955 PetscBool roworiented=a->roworiented; 1956 const PetscScalar *value = v; 1957 MatScalar *ap,*aa = a->a,*bap; 1958 1959 PetscFunctionBegin; 1960 if (roworiented) { 1961 stepval = (n-1)*bs; 1962 } else { 1963 stepval = (m-1)*bs; 1964 } 1965 for (k=0; k<m; k++) { /* loop over added rows */ 1966 row = im[k]; 1967 if (row < 0) continue; 1968 #if defined(PETSC_USE_DEBUG) 1969 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1970 #endif 1971 rp = aj + ai[row]; 1972 ap = aa + bs2*ai[row]; 1973 rmax = imax[row]; 1974 nrow = ailen[row]; 1975 low = 0; 1976 high = nrow; 1977 for (l=0; l<n; l++) { /* loop over added columns */ 1978 if (in[l] < 0) continue; 1979 #if defined(PETSC_USE_DEBUG) 1980 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1981 #endif 1982 col = in[l]; 1983 if (roworiented) { 1984 value = v + (k*(stepval+bs) + l)*bs; 1985 } else { 1986 value = v + (l*(stepval+bs) + k)*bs; 1987 } 1988 if (col <= lastcol) low = 0; 1989 else high = nrow; 1990 lastcol = col; 1991 while (high-low > 7) { 1992 t = (low+high)/2; 1993 if (rp[t] > col) high = t; 1994 else low = t; 1995 } 1996 for (i=low; i<high; i++) { 1997 if (rp[i] > col) break; 1998 if (rp[i] == col) { 1999 bap = ap + bs2*i; 2000 if (roworiented) { 2001 if (is == ADD_VALUES) { 2002 for (ii=0; ii<bs; ii++,value+=stepval) { 2003 for (jj=ii; jj<bs2; jj+=bs) { 2004 bap[jj] += *value++; 2005 } 2006 } 2007 } else { 2008 for (ii=0; ii<bs; ii++,value+=stepval) { 2009 for (jj=ii; jj<bs2; jj+=bs) { 2010 bap[jj] = *value++; 2011 } 2012 } 2013 } 2014 } else { 2015 if (is == ADD_VALUES) { 2016 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2017 for (jj=0; jj<bs; jj++) { 2018 bap[jj] += value[jj]; 2019 } 2020 bap += bs; 2021 } 2022 } else { 2023 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2024 for (jj=0; jj<bs; jj++) { 2025 bap[jj] = value[jj]; 2026 } 2027 bap += bs; 2028 } 2029 } 2030 } 2031 goto noinsert2; 2032 } 2033 } 2034 if (nonew == 1) goto noinsert2; 2035 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2036 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2037 N = nrow++ - 1; high++; 2038 /* shift up all the later entries in this row */ 2039 for (ii=N; ii>=i; ii--) { 2040 rp[ii+1] = rp[ii]; 2041 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2042 } 2043 if (N >= i) { 2044 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2045 } 2046 rp[i] = col; 2047 bap = ap + bs2*i; 2048 if (roworiented) { 2049 for (ii=0; ii<bs; ii++,value+=stepval) { 2050 for (jj=ii; jj<bs2; jj+=bs) { 2051 bap[jj] = *value++; 2052 } 2053 } 2054 } else { 2055 for (ii=0; ii<bs; ii++,value+=stepval) { 2056 for (jj=0; jj<bs; jj++) { 2057 *bap++ = *value++; 2058 } 2059 } 2060 } 2061 noinsert2:; 2062 low = i; 2063 } 2064 ailen[row] = nrow; 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2071 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2072 { 2073 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2074 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2075 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2076 PetscErrorCode ierr; 2077 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2078 MatScalar *aa = a->a,*ap; 2079 PetscReal ratio=0.6; 2080 2081 PetscFunctionBegin; 2082 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2083 2084 if (m) rmax = ailen[0]; 2085 for (i=1; i<mbs; i++) { 2086 /* move each row back by the amount of empty slots (fshift) before it*/ 2087 fshift += imax[i-1] - ailen[i-1]; 2088 rmax = PetscMax(rmax,ailen[i]); 2089 if (fshift) { 2090 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2091 N = ailen[i]; 2092 for (j=0; j<N; j++) { 2093 ip[j-fshift] = ip[j]; 2094 2095 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2096 } 2097 } 2098 ai[i] = ai[i-1] + ailen[i-1]; 2099 } 2100 if (mbs) { 2101 fshift += imax[mbs-1] - ailen[mbs-1]; 2102 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2103 } 2104 /* reset ilen and imax for each row */ 2105 for (i=0; i<mbs; i++) { 2106 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2107 } 2108 a->nz = ai[mbs]; 2109 2110 /* diagonals may have moved, so kill the diagonal pointers */ 2111 a->idiagvalid = PETSC_FALSE; 2112 if (fshift && a->diag) { 2113 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2114 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2115 a->diag = 0; 2116 } 2117 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2118 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2119 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2120 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2121 2122 A->info.mallocs += a->reallocs; 2123 a->reallocs = 0; 2124 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2125 2126 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2127 2128 A->same_nonzero = PETSC_TRUE; 2129 PetscFunctionReturn(0); 2130 } 2131 2132 /* 2133 This function returns an array of flags which indicate the locations of contiguous 2134 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2135 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2136 Assume: sizes should be long enough to hold all the values. 2137 */ 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2140 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2141 { 2142 PetscInt i,j,k,row; 2143 PetscBool flg; 2144 2145 PetscFunctionBegin; 2146 for (i=0,j=0; i<n; j++) { 2147 row = idx[i]; 2148 if (row%bs!=0) { /* Not the begining of a block */ 2149 sizes[j] = 1; 2150 i++; 2151 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2152 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2153 i++; 2154 } else { /* Begining of the block, so check if the complete block exists */ 2155 flg = PETSC_TRUE; 2156 for (k=1; k<bs; k++) { 2157 if (row+k != idx[i+k]) { /* break in the block */ 2158 flg = PETSC_FALSE; 2159 break; 2160 } 2161 } 2162 if (flg) { /* No break in the bs */ 2163 sizes[j] = bs; 2164 i += bs; 2165 } else { 2166 sizes[j] = 1; 2167 i++; 2168 } 2169 } 2170 } 2171 *bs_max = j; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2177 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2178 { 2179 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2180 PetscErrorCode ierr; 2181 PetscInt i,j,k,count,*rows; 2182 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2183 PetscScalar zero = 0.0; 2184 MatScalar *aa; 2185 const PetscScalar *xx; 2186 PetscScalar *bb; 2187 2188 PetscFunctionBegin; 2189 /* fix right hand side if needed */ 2190 if (x && b) { 2191 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2192 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2193 for (i=0; i<is_n; i++) { 2194 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2195 } 2196 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2197 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2198 } 2199 2200 /* Make a copy of the IS and sort it */ 2201 /* allocate memory for rows,sizes */ 2202 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2203 2204 /* copy IS values to rows, and sort them */ 2205 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2206 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2207 2208 if (baij->keepnonzeropattern) { 2209 for (i=0; i<is_n; i++) sizes[i] = 1; 2210 bs_max = is_n; 2211 A->same_nonzero = PETSC_TRUE; 2212 } else { 2213 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2214 2215 A->same_nonzero = PETSC_FALSE; 2216 } 2217 2218 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2219 row = rows[j]; 2220 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2221 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2222 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2223 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2224 if (diag != (PetscScalar)0.0) { 2225 if (baij->ilen[row/bs] > 0) { 2226 baij->ilen[row/bs] = 1; 2227 baij->j[baij->i[row/bs]] = row/bs; 2228 2229 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2230 } 2231 /* Now insert all the diagonal values for this bs */ 2232 for (k=0; k<bs; k++) { 2233 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2234 } 2235 } else { /* (diag == 0.0) */ 2236 baij->ilen[row/bs] = 0; 2237 } /* end (diag == 0.0) */ 2238 } else { /* (sizes[i] != bs) */ 2239 #if defined(PETSC_USE_DEBUG) 2240 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2241 #endif 2242 for (k=0; k<count; k++) { 2243 aa[0] = zero; 2244 aa += bs; 2245 } 2246 if (diag != (PetscScalar)0.0) { 2247 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2248 } 2249 } 2250 } 2251 2252 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2253 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2259 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2260 { 2261 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2262 PetscErrorCode ierr; 2263 PetscInt i,j,k,count; 2264 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2265 PetscScalar zero = 0.0; 2266 MatScalar *aa; 2267 const PetscScalar *xx; 2268 PetscScalar *bb; 2269 PetscBool *zeroed,vecs = PETSC_FALSE; 2270 2271 PetscFunctionBegin; 2272 /* fix right hand side if needed */ 2273 if (x && b) { 2274 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2275 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2276 vecs = PETSC_TRUE; 2277 } 2278 A->same_nonzero = PETSC_TRUE; 2279 2280 /* zero the columns */ 2281 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2282 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2283 for (i=0; i<is_n; i++) { 2284 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2285 zeroed[is_idx[i]] = PETSC_TRUE; 2286 } 2287 for (i=0; i<A->rmap->N; i++) { 2288 if (!zeroed[i]) { 2289 row = i/bs; 2290 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2291 for (k=0; k<bs; k++) { 2292 col = bs*baij->j[j] + k; 2293 if (zeroed[col]) { 2294 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2295 if (vecs) bb[i] -= aa[0]*xx[col]; 2296 aa[0] = 0.0; 2297 } 2298 } 2299 } 2300 } else if (vecs) bb[i] = diag*xx[i]; 2301 } 2302 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2303 if (vecs) { 2304 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2305 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2306 } 2307 2308 /* zero the rows */ 2309 for (i=0; i<is_n; i++) { 2310 row = is_idx[i]; 2311 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2312 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2313 for (k=0; k<count; k++) { 2314 aa[0] = zero; 2315 aa += bs; 2316 } 2317 if (diag != (PetscScalar)0.0) { 2318 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2319 } 2320 } 2321 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2327 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2328 { 2329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2330 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2331 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2332 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2333 PetscErrorCode ierr; 2334 PetscInt ridx,cidx,bs2=a->bs2; 2335 PetscBool roworiented=a->roworiented; 2336 MatScalar *ap,value,*aa=a->a,*bap; 2337 2338 PetscFunctionBegin; 2339 if (v) PetscValidScalarPointer(v,6); 2340 for (k=0; k<m; k++) { /* loop over added rows */ 2341 row = im[k]; 2342 brow = row/bs; 2343 if (row < 0) continue; 2344 #if defined(PETSC_USE_DEBUG) 2345 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2346 #endif 2347 rp = aj + ai[brow]; 2348 ap = aa + bs2*ai[brow]; 2349 rmax = imax[brow]; 2350 nrow = ailen[brow]; 2351 low = 0; 2352 high = nrow; 2353 for (l=0; l<n; l++) { /* loop over added columns */ 2354 if (in[l] < 0) continue; 2355 #if defined(PETSC_USE_DEBUG) 2356 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2357 #endif 2358 col = in[l]; bcol = col/bs; 2359 ridx = row % bs; cidx = col % bs; 2360 if (roworiented) { 2361 value = v[l + k*n]; 2362 } else { 2363 value = v[k + l*m]; 2364 } 2365 if (col <= lastcol) low = 0; else high = nrow; 2366 lastcol = col; 2367 while (high-low > 7) { 2368 t = (low+high)/2; 2369 if (rp[t] > bcol) high = t; 2370 else low = t; 2371 } 2372 for (i=low; i<high; i++) { 2373 if (rp[i] > bcol) break; 2374 if (rp[i] == bcol) { 2375 bap = ap + bs2*i + bs*cidx + ridx; 2376 if (is == ADD_VALUES) *bap += value; 2377 else *bap = value; 2378 goto noinsert1; 2379 } 2380 } 2381 if (nonew == 1) goto noinsert1; 2382 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2383 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2384 N = nrow++ - 1; high++; 2385 /* shift up all the later entries in this row */ 2386 for (ii=N; ii>=i; ii--) { 2387 rp[ii+1] = rp[ii]; 2388 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2389 } 2390 if (N>=i) { 2391 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2392 } 2393 rp[i] = bcol; 2394 ap[bs2*i + bs*cidx + ridx] = value; 2395 a->nz++; 2396 noinsert1:; 2397 low = i; 2398 } 2399 ailen[brow] = nrow; 2400 } 2401 A->same_nonzero = PETSC_FALSE; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2407 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2408 { 2409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2410 Mat outA; 2411 PetscErrorCode ierr; 2412 PetscBool row_identity,col_identity; 2413 2414 PetscFunctionBegin; 2415 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2416 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2417 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2418 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2419 2420 outA = inA; 2421 inA->factortype = MAT_FACTOR_LU; 2422 2423 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2424 2425 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2426 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2427 a->row = row; 2428 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2429 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2430 a->col = col; 2431 2432 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2433 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2434 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2435 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2436 2437 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2438 if (!a->solve_work) { 2439 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2440 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2441 } 2442 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2448 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2449 { 2450 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2451 PetscInt i,nz,mbs; 2452 2453 PetscFunctionBegin; 2454 nz = baij->maxnz; 2455 mbs = baij->mbs; 2456 for (i=0; i<nz; i++) { 2457 baij->j[i] = indices[i]; 2458 } 2459 baij->nz = nz; 2460 for (i=0; i<mbs; i++) { 2461 baij->ilen[i] = baij->imax[i]; 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2468 /*@ 2469 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2470 in the matrix. 2471 2472 Input Parameters: 2473 + mat - the SeqBAIJ matrix 2474 - indices - the column indices 2475 2476 Level: advanced 2477 2478 Notes: 2479 This can be called if you have precomputed the nonzero structure of the 2480 matrix and want to provide it to the matrix object to improve the performance 2481 of the MatSetValues() operation. 2482 2483 You MUST have set the correct numbers of nonzeros per row in the call to 2484 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2485 2486 MUST be called before any calls to MatSetValues(); 2487 2488 @*/ 2489 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2490 { 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2495 PetscValidPointer(indices,2); 2496 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2502 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2503 { 2504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2505 PetscErrorCode ierr; 2506 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2507 PetscReal atmp; 2508 PetscScalar *x,zero = 0.0; 2509 MatScalar *aa; 2510 PetscInt ncols,brow,krow,kcol; 2511 2512 PetscFunctionBegin; 2513 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2514 bs = A->rmap->bs; 2515 aa = a->a; 2516 ai = a->i; 2517 aj = a->j; 2518 mbs = a->mbs; 2519 2520 ierr = VecSet(v,zero);CHKERRQ(ierr); 2521 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2522 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2523 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2524 for (i=0; i<mbs; i++) { 2525 ncols = ai[1] - ai[0]; ai++; 2526 brow = bs*i; 2527 for (j=0; j<ncols; j++) { 2528 for (kcol=0; kcol<bs; kcol++) { 2529 for (krow=0; krow<bs; krow++) { 2530 atmp = PetscAbsScalar(*aa);aa++; 2531 row = brow + krow; /* row index */ 2532 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2533 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2534 } 2535 } 2536 aj++; 2537 } 2538 } 2539 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatCopy_SeqBAIJ" 2545 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2546 { 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 /* If the two matrices have the same copy implementation, use fast copy. */ 2551 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2553 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2554 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2555 2556 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2557 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2558 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2559 } else { 2560 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2567 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2568 { 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2578 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2579 { 2580 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2581 2582 PetscFunctionBegin; 2583 *array = a->a; 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2589 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2590 { 2591 PetscFunctionBegin; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2597 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2598 { 2599 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2600 PetscErrorCode ierr; 2601 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2602 PetscBLASInt one=1; 2603 2604 PetscFunctionBegin; 2605 if (str == SAME_NONZERO_PATTERN) { 2606 PetscScalar alpha = a; 2607 PetscBLASInt bnz; 2608 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2609 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2610 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2611 if (y->xtoy && y->XtoY != X) { 2612 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2613 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2614 } 2615 if (!y->xtoy) { /* get xtoy */ 2616 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2617 y->XtoY = X; 2618 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2619 } 2620 for (i=0; i<x->nz; i++) { 2621 j = 0; 2622 while (j < bs2) { 2623 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2624 j++; 2625 } 2626 } 2627 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2628 } else { 2629 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2630 } 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2636 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2637 { 2638 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2639 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2640 MatScalar *aa = a->a; 2641 2642 PetscFunctionBegin; 2643 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2649 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2650 { 2651 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2652 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2653 MatScalar *aa = a->a; 2654 2655 PetscFunctionBegin; 2656 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2664 /* 2665 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2666 */ 2667 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2668 { 2669 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2670 PetscErrorCode ierr; 2671 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2672 PetscInt nz = a->i[m],row,*jj,mr,col; 2673 2674 PetscFunctionBegin; 2675 *nn = n; 2676 if (!ia) PetscFunctionReturn(0); 2677 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2678 else { 2679 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2680 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2681 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2682 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2683 jj = a->j; 2684 for (i=0; i<nz; i++) { 2685 collengths[jj[i]]++; 2686 } 2687 cia[0] = oshift; 2688 for (i=0; i<n; i++) { 2689 cia[i+1] = cia[i] + collengths[i]; 2690 } 2691 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2692 jj = a->j; 2693 for (row=0; row<m; row++) { 2694 mr = a->i[row+1] - a->i[row]; 2695 for (i=0; i<mr; i++) { 2696 col = *jj++; 2697 2698 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2699 } 2700 } 2701 ierr = PetscFree(collengths);CHKERRQ(ierr); 2702 *ia = cia; *ja = cja; 2703 } 2704 PetscFunctionReturn(0); 2705 } 2706 2707 #undef __FUNCT__ 2708 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2709 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2710 { 2711 PetscErrorCode ierr; 2712 2713 PetscFunctionBegin; 2714 if (!ia) PetscFunctionReturn(0); 2715 ierr = PetscFree(*ia);CHKERRQ(ierr); 2716 ierr = PetscFree(*ja);CHKERRQ(ierr); 2717 PetscFunctionReturn(0); 2718 } 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2722 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2723 { 2724 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2725 PetscErrorCode ierr; 2726 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2727 PetscScalar dx,*y,*xx,*w3_array; 2728 PetscScalar *vscale_array; 2729 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2730 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2731 void *fctx = coloring->fctx; 2732 PetscBool flg = PETSC_FALSE; 2733 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2734 Vec x1_tmp; 2735 2736 PetscFunctionBegin; 2737 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2738 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2739 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2740 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2741 2742 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2743 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2744 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2745 if (flg) { 2746 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2747 } else { 2748 PetscBool assembled; 2749 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2750 if (assembled) { 2751 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2752 } 2753 } 2754 2755 x1_tmp = x1; 2756 if (!coloring->vscale) { 2757 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2758 } 2759 2760 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2761 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2762 } 2763 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2764 2765 /* Set w1 = F(x1) */ 2766 if (!coloring->fset) { 2767 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2768 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2769 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2770 } else { 2771 coloring->fset = PETSC_FALSE; 2772 } 2773 2774 if (!coloring->w3) { 2775 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2777 } 2778 w3 = coloring->w3; 2779 2780 CHKMEMQ; 2781 /* Compute all the local scale factors, including ghost points */ 2782 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2783 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2784 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2785 if (ctype == IS_COLORING_GHOSTED) { 2786 col_start = 0; col_end = N; 2787 } else if (ctype == IS_COLORING_GLOBAL) { 2788 xx = xx - start; 2789 vscale_array = vscale_array - start; 2790 col_start = start; col_end = N + start; 2791 } 2792 CHKMEMQ; 2793 for (col=col_start; col<col_end; col++) { 2794 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2795 if (coloring->htype[0] == 'w') { 2796 dx = 1.0 + unorm; 2797 } else { 2798 dx = xx[col]; 2799 } 2800 if (dx == (PetscScalar)0.0) dx = 1.0; 2801 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2802 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2803 dx *= epsilon; 2804 vscale_array[col] = (PetscScalar)1.0/dx; 2805 } CHKMEMQ; 2806 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2807 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2808 if (ctype == IS_COLORING_GLOBAL) { 2809 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2810 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2811 } 2812 CHKMEMQ; 2813 if (coloring->vscaleforrow) { 2814 vscaleforrow = coloring->vscaleforrow; 2815 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2816 2817 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2818 /* 2819 Loop over each color 2820 */ 2821 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2822 for (k=0; k<coloring->ncolors; k++) { 2823 coloring->currentcolor = k; 2824 for (i=0; i<bs; i++) { 2825 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2826 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2827 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2828 /* 2829 Loop over each column associated with color 2830 adding the perturbation to the vector w3. 2831 */ 2832 for (l=0; l<coloring->ncolumns[k]; l++) { 2833 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2834 if (coloring->htype[0] == 'w') { 2835 dx = 1.0 + unorm; 2836 } else { 2837 dx = xx[col]; 2838 } 2839 if (dx == (PetscScalar)0.0) dx = 1.0; 2840 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2841 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2842 dx *= epsilon; 2843 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2844 w3_array[col] += dx; 2845 } 2846 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2847 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2848 2849 /* 2850 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2851 w2 = F(x1 + dx) - F(x1) 2852 */ 2853 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2854 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2855 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2856 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2857 2858 /* 2859 Loop over rows of vector, putting results into Jacobian matrix 2860 */ 2861 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2862 for (l=0; l<coloring->nrows[k]; l++) { 2863 row = bs*coloring->rows[k][l]; /* local row index */ 2864 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2865 for (j=0; j<bs; j++) { 2866 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2867 srows[j] = row + start + j; 2868 } 2869 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2870 } 2871 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2872 } 2873 } /* endof for each color */ 2874 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2875 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2876 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2877 ierr = PetscFree(srows);CHKERRQ(ierr); 2878 2879 coloring->currentcolor = -1; 2880 2881 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2882 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2884 PetscFunctionReturn(0); 2885 } 2886 2887 /* -------------------------------------------------------------------*/ 2888 static struct _MatOps MatOps_Values = { 2889 MatSetValues_SeqBAIJ, 2890 MatGetRow_SeqBAIJ, 2891 MatRestoreRow_SeqBAIJ, 2892 MatMult_SeqBAIJ_N, 2893 /* 4*/ MatMultAdd_SeqBAIJ_N, 2894 MatMultTranspose_SeqBAIJ, 2895 MatMultTransposeAdd_SeqBAIJ, 2896 0, 2897 0, 2898 0, 2899 /* 10*/ 0, 2900 MatLUFactor_SeqBAIJ, 2901 0, 2902 0, 2903 MatTranspose_SeqBAIJ, 2904 /* 15*/ MatGetInfo_SeqBAIJ, 2905 MatEqual_SeqBAIJ, 2906 MatGetDiagonal_SeqBAIJ, 2907 MatDiagonalScale_SeqBAIJ, 2908 MatNorm_SeqBAIJ, 2909 /* 20*/ 0, 2910 MatAssemblyEnd_SeqBAIJ, 2911 MatSetOption_SeqBAIJ, 2912 MatZeroEntries_SeqBAIJ, 2913 /* 24*/ MatZeroRows_SeqBAIJ, 2914 0, 2915 0, 2916 0, 2917 0, 2918 /* 29*/ MatSetUp_SeqBAIJ, 2919 0, 2920 0, 2921 0, 2922 0, 2923 /* 34*/ MatDuplicate_SeqBAIJ, 2924 0, 2925 0, 2926 MatILUFactor_SeqBAIJ, 2927 0, 2928 /* 39*/ MatAXPY_SeqBAIJ, 2929 MatGetSubMatrices_SeqBAIJ, 2930 MatIncreaseOverlap_SeqBAIJ, 2931 MatGetValues_SeqBAIJ, 2932 MatCopy_SeqBAIJ, 2933 /* 44*/ 0, 2934 MatScale_SeqBAIJ, 2935 0, 2936 0, 2937 MatZeroRowsColumns_SeqBAIJ, 2938 /* 49*/ 0, 2939 MatGetRowIJ_SeqBAIJ, 2940 MatRestoreRowIJ_SeqBAIJ, 2941 MatGetColumnIJ_SeqBAIJ, 2942 MatRestoreColumnIJ_SeqBAIJ, 2943 /* 54*/ MatFDColoringCreate_SeqAIJ, 2944 0, 2945 0, 2946 0, 2947 MatSetValuesBlocked_SeqBAIJ, 2948 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2949 MatDestroy_SeqBAIJ, 2950 MatView_SeqBAIJ, 2951 0, 2952 0, 2953 /* 64*/ 0, 2954 0, 2955 0, 2956 0, 2957 0, 2958 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2959 0, 2960 MatConvert_Basic, 2961 0, 2962 0, 2963 /* 74*/ 0, 2964 MatFDColoringApply_BAIJ, 2965 0, 2966 0, 2967 0, 2968 /* 79*/ 0, 2969 0, 2970 0, 2971 0, 2972 MatLoad_SeqBAIJ, 2973 /* 84*/ 0, 2974 0, 2975 0, 2976 0, 2977 0, 2978 /* 89*/ 0, 2979 0, 2980 0, 2981 0, 2982 0, 2983 /* 94*/ 0, 2984 0, 2985 0, 2986 0, 2987 0, 2988 /* 99*/ 0, 2989 0, 2990 0, 2991 0, 2992 0, 2993 /*104*/ 0, 2994 MatRealPart_SeqBAIJ, 2995 MatImaginaryPart_SeqBAIJ, 2996 0, 2997 0, 2998 /*109*/ 0, 2999 0, 3000 0, 3001 0, 3002 MatMissingDiagonal_SeqBAIJ, 3003 /*114*/ 0, 3004 0, 3005 0, 3006 0, 3007 0, 3008 /*119*/ 0, 3009 0, 3010 MatMultHermitianTranspose_SeqBAIJ, 3011 MatMultHermitianTransposeAdd_SeqBAIJ, 3012 0, 3013 /*124*/ 0, 3014 0, 3015 MatInvertBlockDiagonal_SeqBAIJ 3016 }; 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3020 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3021 { 3022 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3023 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3028 3029 /* allocate space for values if not already there */ 3030 if (!aij->saved_values) { 3031 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3032 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3033 } 3034 3035 /* copy values over */ 3036 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3042 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3043 { 3044 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3045 PetscErrorCode ierr; 3046 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3047 3048 PetscFunctionBegin; 3049 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3050 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3051 3052 /* copy values over */ 3053 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 PETSC_EXTERN_C PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3058 PETSC_EXTERN_C PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3062 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3063 { 3064 Mat_SeqBAIJ *b; 3065 PetscErrorCode ierr; 3066 PetscInt i,mbs,nbs,bs2; 3067 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3068 3069 PetscFunctionBegin; 3070 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3071 if (nz == MAT_SKIP_ALLOCATION) { 3072 skipallocation = PETSC_TRUE; 3073 nz = 0; 3074 } 3075 3076 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3077 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3078 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3079 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3080 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3081 3082 B->preallocated = PETSC_TRUE; 3083 3084 mbs = B->rmap->n/bs; 3085 nbs = B->cmap->n/bs; 3086 bs2 = bs*bs; 3087 3088 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3089 3090 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3091 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3092 if (nnz) { 3093 for (i=0; i<mbs; i++) { 3094 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3095 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3096 } 3097 } 3098 3099 b = (Mat_SeqBAIJ*)B->data; 3100 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3101 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3102 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3103 3104 if (!flg) { 3105 switch (bs) { 3106 case 1: 3107 B->ops->mult = MatMult_SeqBAIJ_1; 3108 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3109 B->ops->sor = MatSOR_SeqBAIJ_1; 3110 break; 3111 case 2: 3112 B->ops->mult = MatMult_SeqBAIJ_2; 3113 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3114 B->ops->sor = MatSOR_SeqBAIJ_2; 3115 break; 3116 case 3: 3117 B->ops->mult = MatMult_SeqBAIJ_3; 3118 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3119 B->ops->sor = MatSOR_SeqBAIJ_3; 3120 break; 3121 case 4: 3122 B->ops->mult = MatMult_SeqBAIJ_4; 3123 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3124 B->ops->sor = MatSOR_SeqBAIJ_4; 3125 break; 3126 case 5: 3127 B->ops->mult = MatMult_SeqBAIJ_5; 3128 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3129 B->ops->sor = MatSOR_SeqBAIJ_5; 3130 break; 3131 case 6: 3132 B->ops->mult = MatMult_SeqBAIJ_6; 3133 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3134 B->ops->sor = MatSOR_SeqBAIJ_6; 3135 break; 3136 case 7: 3137 B->ops->mult = MatMult_SeqBAIJ_7; 3138 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3139 B->ops->sor = MatSOR_SeqBAIJ_7; 3140 break; 3141 case 15: 3142 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3143 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3144 B->ops->sor = MatSOR_SeqBAIJ_N; 3145 break; 3146 default: 3147 B->ops->mult = MatMult_SeqBAIJ_N; 3148 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3149 B->ops->sor = MatSOR_SeqBAIJ_N; 3150 break; 3151 } 3152 } 3153 b->mbs = mbs; 3154 b->nbs = nbs; 3155 if (!skipallocation) { 3156 if (!b->imax) { 3157 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3158 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3159 3160 b->free_imax_ilen = PETSC_TRUE; 3161 } 3162 /* b->ilen will count nonzeros in each block row so far. */ 3163 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3164 if (!nnz) { 3165 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3166 else if (nz < 0) nz = 1; 3167 for (i=0; i<mbs; i++) b->imax[i] = nz; 3168 nz = nz*mbs; 3169 } else { 3170 nz = 0; 3171 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3172 } 3173 3174 /* allocate the matrix space */ 3175 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3176 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3177 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3178 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3179 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3180 3181 b->singlemalloc = PETSC_TRUE; 3182 b->i[0] = 0; 3183 for (i=1; i<mbs+1; i++) { 3184 b->i[i] = b->i[i-1] + b->imax[i-1]; 3185 } 3186 b->free_a = PETSC_TRUE; 3187 b->free_ij = PETSC_TRUE; 3188 } else { 3189 b->free_a = PETSC_FALSE; 3190 b->free_ij = PETSC_FALSE; 3191 } 3192 3193 b->bs2 = bs2; 3194 b->mbs = mbs; 3195 b->nz = 0; 3196 b->maxnz = nz; 3197 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3198 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3199 PetscFunctionReturn(0); 3200 } 3201 3202 #undef __FUNCT__ 3203 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3204 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3205 { 3206 PetscInt i,m,nz,nz_max=0,*nnz; 3207 PetscScalar *values=0; 3208 PetscErrorCode ierr; 3209 3210 PetscFunctionBegin; 3211 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3212 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3213 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3215 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3216 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3217 m = B->rmap->n/bs; 3218 3219 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3220 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3221 for (i=0; i<m; i++) { 3222 nz = ii[i+1]- ii[i]; 3223 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3224 nz_max = PetscMax(nz_max, nz); 3225 nnz[i] = nz; 3226 } 3227 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3228 ierr = PetscFree(nnz);CHKERRQ(ierr); 3229 3230 values = (PetscScalar*)V; 3231 if (!values) { 3232 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3233 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3234 } 3235 for (i=0; i<m; i++) { 3236 PetscInt ncols = ii[i+1] - ii[i]; 3237 const PetscInt *icols = jj + ii[i]; 3238 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3239 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3240 } 3241 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3242 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3245 PetscFunctionReturn(0); 3246 } 3247 3248 PETSC_EXTERN_C PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3249 PETSC_EXTERN_C PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3250 #if defined(PETSC_HAVE_MUMPS) 3251 PETSC_EXTERN_C PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3252 #endif 3253 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3254 3255 /*MC 3256 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3257 block sparse compressed row format. 3258 3259 Options Database Keys: 3260 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3261 3262 Level: beginner 3263 3264 .seealso: MatCreateSeqBAIJ() 3265 M*/ 3266 3267 PETSC_EXTERN_C PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatCreate_SeqBAIJ" 3271 PETSC_EXTERN_C PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3272 { 3273 PetscErrorCode ierr; 3274 PetscMPIInt size; 3275 Mat_SeqBAIJ *b; 3276 3277 PetscFunctionBegin; 3278 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3279 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3280 3281 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3282 B->data = (void*)b; 3283 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3284 3285 b->row = 0; 3286 b->col = 0; 3287 b->icol = 0; 3288 b->reallocs = 0; 3289 b->saved_values = 0; 3290 3291 b->roworiented = PETSC_TRUE; 3292 b->nonew = 0; 3293 b->diag = 0; 3294 b->solve_work = 0; 3295 b->mult_work = 0; 3296 B->spptr = 0; 3297 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3298 b->keepnonzeropattern = PETSC_FALSE; 3299 b->xtoy = 0; 3300 b->XtoY = 0; 3301 B->same_nonzero = PETSC_FALSE; 3302 3303 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3304 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3305 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3306 #if defined(PETSC_HAVE_MUMPS) 3307 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3308 #endif 3309 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3310 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3311 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3312 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3313 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3314 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3315 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3316 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3317 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3318 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3319 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3320 PetscFunctionReturn(0); 3321 } 3322 3323 #undef __FUNCT__ 3324 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3325 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3326 { 3327 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3328 PetscErrorCode ierr; 3329 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3330 3331 PetscFunctionBegin; 3332 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3333 3334 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3335 c->imax = a->imax; 3336 c->ilen = a->ilen; 3337 c->free_imax_ilen = PETSC_FALSE; 3338 } else { 3339 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3340 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3341 for (i=0; i<mbs; i++) { 3342 c->imax[i] = a->imax[i]; 3343 c->ilen[i] = a->ilen[i]; 3344 } 3345 c->free_imax_ilen = PETSC_TRUE; 3346 } 3347 3348 /* allocate the matrix space */ 3349 if (mallocmatspace) { 3350 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3351 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3352 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3353 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3354 3355 c->i = a->i; 3356 c->j = a->j; 3357 c->singlemalloc = PETSC_FALSE; 3358 c->free_a = PETSC_TRUE; 3359 c->free_ij = PETSC_FALSE; 3360 c->parent = A; 3361 C->preallocated = PETSC_TRUE; 3362 C->assembled = PETSC_TRUE; 3363 3364 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3365 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3366 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3367 } else { 3368 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3369 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3370 3371 c->singlemalloc = PETSC_TRUE; 3372 c->free_a = PETSC_TRUE; 3373 c->free_ij = PETSC_TRUE; 3374 3375 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3376 if (mbs > 0) { 3377 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3378 if (cpvalues == MAT_COPY_VALUES) { 3379 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3380 } else { 3381 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3382 } 3383 } 3384 C->preallocated = PETSC_TRUE; 3385 C->assembled = PETSC_TRUE; 3386 } 3387 } 3388 3389 c->roworiented = a->roworiented; 3390 c->nonew = a->nonew; 3391 3392 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3393 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3394 3395 c->bs2 = a->bs2; 3396 c->mbs = a->mbs; 3397 c->nbs = a->nbs; 3398 3399 if (a->diag) { 3400 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3401 c->diag = a->diag; 3402 c->free_diag = PETSC_FALSE; 3403 } else { 3404 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3405 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3406 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3407 c->free_diag = PETSC_TRUE; 3408 } 3409 } else c->diag = 0; 3410 3411 c->nz = a->nz; 3412 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3413 c->solve_work = 0; 3414 c->mult_work = 0; 3415 3416 c->compressedrow.use = a->compressedrow.use; 3417 c->compressedrow.nrows = a->compressedrow.nrows; 3418 c->compressedrow.check = a->compressedrow.check; 3419 if (a->compressedrow.use) { 3420 i = a->compressedrow.nrows; 3421 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3422 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3423 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3424 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3425 } else { 3426 c->compressedrow.use = PETSC_FALSE; 3427 c->compressedrow.i = NULL; 3428 c->compressedrow.rindex = NULL; 3429 } 3430 C->same_nonzero = A->same_nonzero; 3431 3432 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3433 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3434 PetscFunctionReturn(0); 3435 } 3436 3437 #undef __FUNCT__ 3438 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3439 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3440 { 3441 PetscErrorCode ierr; 3442 3443 PetscFunctionBegin; 3444 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3445 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3446 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3447 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3448 PetscFunctionReturn(0); 3449 } 3450 3451 #undef __FUNCT__ 3452 #define __FUNCT__ "MatLoad_SeqBAIJ" 3453 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3454 { 3455 Mat_SeqBAIJ *a; 3456 PetscErrorCode ierr; 3457 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3458 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3459 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3460 PetscInt *masked,nmask,tmp,bs2,ishift; 3461 PetscMPIInt size; 3462 int fd; 3463 PetscScalar *aa; 3464 MPI_Comm comm; 3465 3466 PetscFunctionBegin; 3467 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3468 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3469 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3470 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3471 bs2 = bs*bs; 3472 3473 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3474 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3475 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3476 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3477 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3478 M = header[1]; N = header[2]; nz = header[3]; 3479 3480 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3481 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3482 3483 /* 3484 This code adds extra rows to make sure the number of rows is 3485 divisible by the blocksize 3486 */ 3487 mbs = M/bs; 3488 extra_rows = bs - M + bs*(mbs); 3489 if (extra_rows == bs) extra_rows = 0; 3490 else mbs++; 3491 if (extra_rows) { 3492 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3493 } 3494 3495 /* Set global sizes if not already set */ 3496 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3497 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3498 } else { /* Check if the matrix global sizes are correct */ 3499 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3500 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3501 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3502 } 3503 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3504 } 3505 3506 /* read in row lengths */ 3507 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3508 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3509 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3510 3511 /* read in column indices */ 3512 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3513 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3514 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3515 3516 /* loop over row lengths determining block row lengths */ 3517 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3518 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3519 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3520 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3521 rowcount = 0; 3522 nzcount = 0; 3523 for (i=0; i<mbs; i++) { 3524 nmask = 0; 3525 for (j=0; j<bs; j++) { 3526 kmax = rowlengths[rowcount]; 3527 for (k=0; k<kmax; k++) { 3528 tmp = jj[nzcount++]/bs; 3529 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3530 } 3531 rowcount++; 3532 } 3533 browlengths[i] += nmask; 3534 /* zero out the mask elements we set */ 3535 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3536 } 3537 3538 /* Do preallocation */ 3539 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3540 a = (Mat_SeqBAIJ*)newmat->data; 3541 3542 /* set matrix "i" values */ 3543 a->i[0] = 0; 3544 for (i=1; i<= mbs; i++) { 3545 a->i[i] = a->i[i-1] + browlengths[i-1]; 3546 a->ilen[i-1] = browlengths[i-1]; 3547 } 3548 a->nz = 0; 3549 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3550 3551 /* read in nonzero values */ 3552 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3553 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3554 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3555 3556 /* set "a" and "j" values into matrix */ 3557 nzcount = 0; jcount = 0; 3558 for (i=0; i<mbs; i++) { 3559 nzcountb = nzcount; 3560 nmask = 0; 3561 for (j=0; j<bs; j++) { 3562 kmax = rowlengths[i*bs+j]; 3563 for (k=0; k<kmax; k++) { 3564 tmp = jj[nzcount++]/bs; 3565 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3566 } 3567 } 3568 /* sort the masked values */ 3569 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3570 3571 /* set "j" values into matrix */ 3572 maskcount = 1; 3573 for (j=0; j<nmask; j++) { 3574 a->j[jcount++] = masked[j]; 3575 mask[masked[j]] = maskcount++; 3576 } 3577 /* set "a" values into matrix */ 3578 ishift = bs2*a->i[i]; 3579 for (j=0; j<bs; j++) { 3580 kmax = rowlengths[i*bs+j]; 3581 for (k=0; k<kmax; k++) { 3582 tmp = jj[nzcountb]/bs; 3583 block = mask[tmp] - 1; 3584 point = jj[nzcountb] - bs*tmp; 3585 idx = ishift + bs2*block + j + bs*point; 3586 a->a[idx] = (MatScalar)aa[nzcountb++]; 3587 } 3588 } 3589 /* zero out the mask elements we set */ 3590 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3591 } 3592 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3593 3594 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3595 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3596 ierr = PetscFree(aa);CHKERRQ(ierr); 3597 ierr = PetscFree(jj);CHKERRQ(ierr); 3598 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3599 3600 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3601 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3602 PetscFunctionReturn(0); 3603 } 3604 3605 #undef __FUNCT__ 3606 #define __FUNCT__ "MatCreateSeqBAIJ" 3607 /*@C 3608 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3609 compressed row) format. For good matrix assembly performance the 3610 user should preallocate the matrix storage by setting the parameter nz 3611 (or the array nnz). By setting these parameters accurately, performance 3612 during matrix assembly can be increased by more than a factor of 50. 3613 3614 Collective on MPI_Comm 3615 3616 Input Parameters: 3617 + comm - MPI communicator, set to PETSC_COMM_SELF 3618 . bs - size of block 3619 . m - number of rows 3620 . n - number of columns 3621 . nz - number of nonzero blocks per block row (same for all rows) 3622 - nnz - array containing the number of nonzero blocks in the various block rows 3623 (possibly different for each block row) or NULL 3624 3625 Output Parameter: 3626 . A - the matrix 3627 3628 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3629 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3630 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3631 3632 Options Database Keys: 3633 . -mat_no_unroll - uses code that does not unroll the loops in the 3634 block calculations (much slower) 3635 . -mat_block_size - size of the blocks to use 3636 3637 Level: intermediate 3638 3639 Notes: 3640 The number of rows and columns must be divisible by blocksize. 3641 3642 If the nnz parameter is given then the nz parameter is ignored 3643 3644 A nonzero block is any block that as 1 or more nonzeros in it 3645 3646 The block AIJ format is fully compatible with standard Fortran 77 3647 storage. That is, the stored row and column indices can begin at 3648 either one (as in Fortran) or zero. See the users' manual for details. 3649 3650 Specify the preallocated storage with either nz or nnz (not both). 3651 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3652 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3653 matrices. 3654 3655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3656 @*/ 3657 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3658 { 3659 PetscErrorCode ierr; 3660 3661 PetscFunctionBegin; 3662 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3663 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3664 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3665 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3666 PetscFunctionReturn(0); 3667 } 3668 3669 #undef __FUNCT__ 3670 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3671 /*@C 3672 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3673 per row in the matrix. For good matrix assembly performance the 3674 user should preallocate the matrix storage by setting the parameter nz 3675 (or the array nnz). By setting these parameters accurately, performance 3676 during matrix assembly can be increased by more than a factor of 50. 3677 3678 Collective on MPI_Comm 3679 3680 Input Parameters: 3681 + A - the matrix 3682 . bs - size of block 3683 . nz - number of block nonzeros per block row (same for all rows) 3684 - nnz - array containing the number of block nonzeros in the various block rows 3685 (possibly different for each block row) or NULL 3686 3687 Options Database Keys: 3688 . -mat_no_unroll - uses code that does not unroll the loops in the 3689 block calculations (much slower) 3690 . -mat_block_size - size of the blocks to use 3691 3692 Level: intermediate 3693 3694 Notes: 3695 If the nnz parameter is given then the nz parameter is ignored 3696 3697 You can call MatGetInfo() to get information on how effective the preallocation was; 3698 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3699 You can also run with the option -info and look for messages with the string 3700 malloc in them to see if additional memory allocation was needed. 3701 3702 The block AIJ format is fully compatible with standard Fortran 77 3703 storage. That is, the stored row and column indices can begin at 3704 either one (as in Fortran) or zero. See the users' manual for details. 3705 3706 Specify the preallocated storage with either nz or nnz (not both). 3707 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3708 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3709 3710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3711 @*/ 3712 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3713 { 3714 PetscErrorCode ierr; 3715 3716 PetscFunctionBegin; 3717 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3718 PetscValidType(B,1); 3719 PetscValidLogicalCollectiveInt(B,bs,2); 3720 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3721 PetscFunctionReturn(0); 3722 } 3723 3724 #undef __FUNCT__ 3725 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3726 /*@C 3727 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3728 (the default sequential PETSc format). 3729 3730 Collective on MPI_Comm 3731 3732 Input Parameters: 3733 + A - the matrix 3734 . i - the indices into j for the start of each local row (starts with zero) 3735 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3736 - v - optional values in the matrix 3737 3738 Level: developer 3739 3740 .keywords: matrix, aij, compressed row, sparse 3741 3742 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3743 @*/ 3744 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3745 { 3746 PetscErrorCode ierr; 3747 3748 PetscFunctionBegin; 3749 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3750 PetscValidType(B,1); 3751 PetscValidLogicalCollectiveInt(B,bs,2); 3752 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3759 /*@ 3760 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3761 3762 Collective on MPI_Comm 3763 3764 Input Parameters: 3765 + comm - must be an MPI communicator of size 1 3766 . bs - size of block 3767 . m - number of rows 3768 . n - number of columns 3769 . i - row indices 3770 . j - column indices 3771 - a - matrix values 3772 3773 Output Parameter: 3774 . mat - the matrix 3775 3776 Level: advanced 3777 3778 Notes: 3779 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3780 once the matrix is destroyed 3781 3782 You cannot set new nonzero locations into this matrix, that will generate an error. 3783 3784 The i and j indices are 0 based 3785 3786 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3787 3788 3789 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3790 3791 @*/ 3792 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3793 { 3794 PetscErrorCode ierr; 3795 PetscInt ii; 3796 Mat_SeqBAIJ *baij; 3797 3798 PetscFunctionBegin; 3799 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3800 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3801 3802 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3803 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3804 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3805 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3806 baij = (Mat_SeqBAIJ*)(*mat)->data; 3807 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3808 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3809 3810 baij->i = i; 3811 baij->j = j; 3812 baij->a = a; 3813 3814 baij->singlemalloc = PETSC_FALSE; 3815 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3816 baij->free_a = PETSC_FALSE; 3817 baij->free_ij = PETSC_FALSE; 3818 3819 for (ii=0; ii<m; ii++) { 3820 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3821 #if defined(PETSC_USE_DEBUG) 3822 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3823 #endif 3824 } 3825 #if defined(PETSC_USE_DEBUG) 3826 for (ii=0; ii<baij->i[m]; ii++) { 3827 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3828 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3829 } 3830 #endif 3831 3832 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 PetscFunctionReturn(0); 3835 } 3836