1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs){ 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 987 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; else high = nrow; 1132 lastcol = col; 1133 value = v + k*(stepval+4 + l)*4; 1134 while (high-low > 7) { 1135 t = (low+high)/2; 1136 if (rp[t] > col) high = t; 1137 else low = t; 1138 } 1139 for (i=low; i<high; i++) { 1140 if (rp[i] > col) break; 1141 if (rp[i] == col) { 1142 bap = ap + 16*i; 1143 for (ii=0; ii<4; ii++,value+=stepval) { 1144 for (jj=ii; jj<16; jj+=4) { 1145 bap[jj] += *value++; 1146 } 1147 } 1148 goto noinsert2; 1149 } 1150 } 1151 N = nrow++ - 1; 1152 high++; /* added new column index thus must search to one higher than before */ 1153 /* shift up all the later entries in this row */ 1154 for (ii=N; ii>=i; ii--) { 1155 rp[ii+1] = rp[ii]; 1156 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1157 } 1158 if (N >= i) { 1159 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1160 } 1161 rp[i] = col; 1162 bap = ap + 16*i; 1163 for (ii=0; ii<4; ii++,value+=stepval) { 1164 for (jj=ii; jj<16; jj+=4) { 1165 bap[jj] = *value++; 1166 } 1167 } 1168 noinsert2:; 1169 low = i; 1170 } 1171 ailen[row] = nrow; 1172 } 1173 PetscFunctionReturnVoid(); 1174 } 1175 EXTERN_C_END 1176 1177 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1178 #define matsetvalues4_ MATSETVALUES4 1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1180 #define matsetvalues4_ matsetvalues4 1181 #endif 1182 1183 EXTERN_C_BEGIN 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSetValues4_" 1186 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1187 { 1188 Mat A = *AA; 1189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1190 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1191 PetscInt *ai=a->i,*ailen=a->ilen; 1192 PetscInt *aj=a->j,brow,bcol; 1193 PetscInt ridx,cidx,lastcol = -1; 1194 MatScalar *ap,value,*aa=a->a,*bap; 1195 1196 PetscFunctionBegin; 1197 for (k=0; k<m; k++) { /* loop over added rows */ 1198 row = im[k]; brow = row/4; 1199 rp = aj + ai[brow]; 1200 ap = aa + 16*ai[brow]; 1201 nrow = ailen[brow]; 1202 low = 0; 1203 high = nrow; 1204 for (l=0; l<n; l++) { /* loop over added columns */ 1205 col = in[l]; bcol = col/4; 1206 ridx = row % 4; cidx = col % 4; 1207 value = v[l + k*n]; 1208 if (col <= lastcol) low = 0; else high = nrow; 1209 lastcol = col; 1210 while (high-low > 7) { 1211 t = (low+high)/2; 1212 if (rp[t] > bcol) high = t; 1213 else low = t; 1214 } 1215 for (i=low; i<high; i++) { 1216 if (rp[i] > bcol) break; 1217 if (rp[i] == bcol) { 1218 bap = ap + 16*i + 4*cidx + ridx; 1219 *bap += value; 1220 goto noinsert1; 1221 } 1222 } 1223 N = nrow++ - 1; 1224 high++; /* added new column thus must search to one higher than before */ 1225 /* shift up all the later entries in this row */ 1226 for (ii=N; ii>=i; ii--) { 1227 rp[ii+1] = rp[ii]; 1228 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1229 } 1230 if (N>=i) { 1231 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1232 } 1233 rp[i] = bcol; 1234 ap[16*i + 4*cidx + ridx] = value; 1235 noinsert1:; 1236 low = i; 1237 } 1238 ailen[brow] = nrow; 1239 } 1240 PetscFunctionReturnVoid(); 1241 } 1242 EXTERN_C_END 1243 1244 /* 1245 Checks for missing diagonals 1246 */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 PetscInt *diag,*jj = a->j,i; 1254 1255 PetscFunctionBegin; 1256 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1257 *missing = PETSC_FALSE; 1258 if (A->rmap->n > 0 && !jj) { 1259 *missing = PETSC_TRUE; 1260 if (d) *d = 0; 1261 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1262 } else { 1263 diag = a->diag; 1264 for (i=0; i<a->mbs; i++) { 1265 if (jj[diag[i]] != i) { 1266 *missing = PETSC_TRUE; 1267 if (d) *d = i; 1268 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1269 break; 1270 } 1271 } 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt i,j,m = a->mbs; 1283 1284 PetscFunctionBegin; 1285 if (!a->diag) { 1286 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1287 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1288 a->free_diag = PETSC_TRUE; 1289 } 1290 for (i=0; i<m; i++) { 1291 a->diag[i] = a->i[i+1]; 1292 for (j=a->i[i]; j<a->i[i+1]; j++) { 1293 if (a->j[j] == i) { 1294 a->diag[i] = j; 1295 break; 1296 } 1297 } 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1312 PetscInt *tia, *tja; 1313 1314 PetscFunctionBegin; 1315 *nn = n; 1316 if (!ia) PetscFunctionReturn(0); 1317 if (symmetric) { 1318 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1319 nz = tia[n]; 1320 } else { 1321 tia = a->i; tja = a->j; 1322 } 1323 1324 if (!blockcompressed && bs > 1) { 1325 (*nn) *= bs; 1326 /* malloc & create the natural set of indices */ 1327 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1328 if (n) { 1329 (*ia)[0] = 0; 1330 for (j=1; j<bs; j++) { 1331 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1332 } 1333 } 1334 1335 for (i=1; i<n; i++) { 1336 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1337 for (j=1; j<bs; j++) { 1338 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1339 } 1340 } 1341 if (n) { 1342 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1343 } 1344 1345 if (ja) { 1346 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1347 cnt = 0; 1348 for (i=0; i<n; i++) { 1349 for (j=0; j<bs; j++) { 1350 for (k=tia[i]; k<tia[i+1]; k++) { 1351 for (l=0; l<bs; l++) { 1352 (*ja)[cnt++] = bs*tja[k] + l; 1353 } 1354 } 1355 } 1356 } 1357 } 1358 1359 n *= bs; 1360 nz *= bs*bs; 1361 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1362 ierr = PetscFree(tia);CHKERRQ(ierr); 1363 ierr = PetscFree(tja);CHKERRQ(ierr); 1364 } 1365 } else if (oshift == 1) { 1366 if (symmetric) { 1367 PetscInt nz = tia[A->rmap->n/bs]; 1368 /* add 1 to i and j indices */ 1369 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1370 *ia = tia; 1371 if (ja) { 1372 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1373 *ja = tja; 1374 } 1375 } else { 1376 PetscInt nz = a->i[A->rmap->n/bs]; 1377 /* malloc space and add 1 to i and j indices */ 1378 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1379 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1380 if (ja) { 1381 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1382 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1383 } 1384 } 1385 } else { 1386 *ia = tia; 1387 if (ja) *ja = tja; 1388 } 1389 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1396 { 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 if (!ia) PetscFunctionReturn(0); 1401 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1402 ierr = PetscFree(*ia);CHKERRQ(ierr); 1403 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1411 { 1412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 #if defined(PETSC_USE_LOG) 1417 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1418 #endif 1419 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1420 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1421 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1422 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1423 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1424 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1425 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1426 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1427 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1428 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1429 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1430 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1431 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1432 1433 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1434 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1435 ierr = PetscFree(A->data);CHKERRQ(ierr); 1436 1437 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1445 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1446 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1447 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1454 { 1455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1456 PetscErrorCode ierr; 1457 1458 PetscFunctionBegin; 1459 switch (op) { 1460 case MAT_ROW_ORIENTED: 1461 a->roworiented = flg; 1462 break; 1463 case MAT_KEEP_NONZERO_PATTERN: 1464 a->keepnonzeropattern = flg; 1465 break; 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 a->nonew = (flg ? 0 : 1); 1468 break; 1469 case MAT_NEW_NONZERO_LOCATION_ERR: 1470 a->nonew = (flg ? -1 : 0); 1471 break; 1472 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1473 a->nonew = (flg ? -2 : 0); 1474 break; 1475 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1476 a->nounused = (flg ? -1 : 0); 1477 break; 1478 case MAT_CHECK_COMPRESSED_ROW: 1479 a->compressedrow.check = flg; 1480 break; 1481 case MAT_NEW_DIAGONALS: 1482 case MAT_IGNORE_OFF_PROC_ENTRIES: 1483 case MAT_USE_HASH_TABLE: 1484 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1485 break; 1486 case MAT_SYMMETRIC: 1487 case MAT_STRUCTURALLY_SYMMETRIC: 1488 case MAT_HERMITIAN: 1489 case MAT_SYMMETRY_ETERNAL: 1490 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1491 break; 1492 default: 1493 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1500 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1501 { 1502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1503 PetscErrorCode ierr; 1504 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1505 MatScalar *aa,*aa_i; 1506 PetscScalar *v_i; 1507 1508 PetscFunctionBegin; 1509 bs = A->rmap->bs; 1510 ai = a->i; 1511 aj = a->j; 1512 aa = a->a; 1513 bs2 = a->bs2; 1514 1515 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1516 1517 bn = row/bs; /* Block number */ 1518 bp = row % bs; /* Block Position */ 1519 M = ai[bn+1] - ai[bn]; 1520 *nz = bs*M; 1521 1522 if (v) { 1523 *v = 0; 1524 if (*nz) { 1525 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1526 for (i=0; i<M; i++) { /* for each block in the block row */ 1527 v_i = *v + i*bs; 1528 aa_i = aa + bs2*(ai[bn] + i); 1529 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1530 } 1531 } 1532 } 1533 1534 if (idx) { 1535 *idx = 0; 1536 if (*nz) { 1537 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1538 for (i=0; i<M; i++) { /* for each block in the block row */ 1539 idx_i = *idx + i*bs; 1540 itmp = bs*aj[ai[bn] + i]; 1541 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1542 } 1543 } 1544 } 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1550 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1551 { 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1556 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1557 PetscFunctionReturn(0); 1558 } 1559 1560 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1564 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1565 { 1566 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1567 Mat C; 1568 PetscErrorCode ierr; 1569 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1570 PetscInt *rows,*cols,bs2=a->bs2; 1571 MatScalar *array; 1572 1573 PetscFunctionBegin; 1574 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1575 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1576 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1577 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1578 1579 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1580 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1581 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1582 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1583 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1584 ierr = PetscFree(col);CHKERRQ(ierr); 1585 } else { 1586 C = *B; 1587 } 1588 1589 array = a->a; 1590 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1591 for (i=0; i<mbs; i++) { 1592 cols[0] = i*bs; 1593 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1594 len = ai[i+1] - ai[i]; 1595 for (j=0; j<len; j++) { 1596 rows[0] = (*aj++)*bs; 1597 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1598 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1599 array += bs2; 1600 } 1601 } 1602 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1603 1604 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1605 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1606 1607 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1608 *B = C; 1609 } else { 1610 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1618 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1619 { 1620 PetscErrorCode ierr; 1621 Mat Btrans; 1622 1623 PetscFunctionBegin; 1624 *f = PETSC_FALSE; 1625 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1626 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1627 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 EXTERN_C_END 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1634 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1635 { 1636 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1637 PetscErrorCode ierr; 1638 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1639 int fd; 1640 PetscScalar *aa; 1641 FILE *file; 1642 1643 PetscFunctionBegin; 1644 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1645 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1646 col_lens[0] = MAT_FILE_CLASSID; 1647 1648 col_lens[1] = A->rmap->N; 1649 col_lens[2] = A->cmap->n; 1650 col_lens[3] = a->nz*bs2; 1651 1652 /* store lengths of each row and write (including header) to file */ 1653 count = 0; 1654 for (i=0; i<a->mbs; i++) { 1655 for (j=0; j<bs; j++) { 1656 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1657 } 1658 } 1659 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1660 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1661 1662 /* store column indices (zero start index) */ 1663 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1664 count = 0; 1665 for (i=0; i<a->mbs; i++) { 1666 for (j=0; j<bs; j++) { 1667 for (k=a->i[i]; k<a->i[i+1]; k++) { 1668 for (l=0; l<bs; l++) { 1669 jj[count++] = bs*a->j[k] + l; 1670 } 1671 } 1672 } 1673 } 1674 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1675 ierr = PetscFree(jj);CHKERRQ(ierr); 1676 1677 /* store nonzero values */ 1678 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1679 count = 0; 1680 for (i=0; i<a->mbs; i++) { 1681 for (j=0; j<bs; j++) { 1682 for (k=a->i[i]; k<a->i[i+1]; k++) { 1683 for (l=0; l<bs; l++) { 1684 aa[count++] = a->a[bs2*k + l*bs + j]; 1685 } 1686 } 1687 } 1688 } 1689 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1690 ierr = PetscFree(aa);CHKERRQ(ierr); 1691 1692 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1693 if (file) { 1694 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1701 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1702 { 1703 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1704 PetscErrorCode ierr; 1705 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1706 PetscViewerFormat format; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1710 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1711 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1712 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1713 Mat aij; 1714 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1715 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1716 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1717 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1718 PetscFunctionReturn(0); 1719 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1720 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1721 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1722 for (i=0; i<a->mbs; i++) { 1723 for (j=0; j<bs; j++) { 1724 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1725 for (k=a->i[i]; k<a->i[i+1]; k++) { 1726 for (l=0; l<bs; l++) { 1727 #if defined(PETSC_USE_COMPLEX) 1728 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1729 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1730 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1731 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1732 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1733 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1734 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1735 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1736 } 1737 #else 1738 if (a->a[bs2*k + l*bs + j] != 0.0) { 1739 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1740 } 1741 #endif 1742 } 1743 } 1744 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1745 } 1746 } 1747 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1748 } else { 1749 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1750 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1751 for (i=0; i<a->mbs; i++) { 1752 for (j=0; j<bs; j++) { 1753 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1754 for (k=a->i[i]; k<a->i[i+1]; k++) { 1755 for (l=0; l<bs; l++) { 1756 #if defined(PETSC_USE_COMPLEX) 1757 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1758 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1759 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1760 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1761 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1762 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1763 } else { 1764 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1765 } 1766 #else 1767 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1768 #endif 1769 } 1770 } 1771 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1772 } 1773 } 1774 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1775 } 1776 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1782 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1783 { 1784 Mat A = (Mat) Aa; 1785 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1786 PetscErrorCode ierr; 1787 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1788 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1789 MatScalar *aa; 1790 PetscViewer viewer; 1791 PetscViewerFormat format; 1792 1793 PetscFunctionBegin; 1794 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1795 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1796 1797 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1798 1799 /* loop over matrix elements drawing boxes */ 1800 1801 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1802 color = PETSC_DRAW_BLUE; 1803 for (i=0,row=0; i<mbs; i++,row+=bs) { 1804 for (j=a->i[i]; j<a->i[i+1]; j++) { 1805 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1806 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1807 aa = a->a + j*bs2; 1808 for (k=0; k<bs; k++) { 1809 for (l=0; l<bs; l++) { 1810 if (PetscRealPart(*aa++) >= 0.) continue; 1811 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1812 } 1813 } 1814 } 1815 } 1816 color = PETSC_DRAW_CYAN; 1817 for (i=0,row=0; i<mbs; i++,row+=bs) { 1818 for (j=a->i[i]; j<a->i[i+1]; j++) { 1819 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1820 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1821 aa = a->a + j*bs2; 1822 for (k=0; k<bs; k++) { 1823 for (l=0; l<bs; l++) { 1824 if (PetscRealPart(*aa++) != 0.) continue; 1825 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1826 } 1827 } 1828 } 1829 } 1830 color = PETSC_DRAW_RED; 1831 for (i=0,row=0; i<mbs; i++,row+=bs) { 1832 for (j=a->i[i]; j<a->i[i+1]; j++) { 1833 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1834 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1835 aa = a->a + j*bs2; 1836 for (k=0; k<bs; k++) { 1837 for (l=0; l<bs; l++) { 1838 if (PetscRealPart(*aa++) <= 0.) continue; 1839 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1840 } 1841 } 1842 } 1843 } 1844 } else { 1845 /* use contour shading to indicate magnitude of values */ 1846 /* first determine max of all nonzero values */ 1847 PetscDraw popup; 1848 PetscReal scale,maxv = 0.0; 1849 1850 for (i=0; i<a->nz*a->bs2; i++) { 1851 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1852 } 1853 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1854 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1855 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1856 for (i=0,row=0; i<mbs; i++,row+=bs) { 1857 for (j=a->i[i]; j<a->i[i+1]; j++) { 1858 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1859 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1860 aa = a->a + j*bs2; 1861 for (k=0; k<bs; k++) { 1862 for (l=0; l<bs; l++) { 1863 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1864 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1865 } 1866 } 1867 } 1868 } 1869 } 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1875 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1876 { 1877 PetscErrorCode ierr; 1878 PetscReal xl,yl,xr,yr,w,h; 1879 PetscDraw draw; 1880 PetscBool isnull; 1881 1882 PetscFunctionBegin; 1883 1884 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1885 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1886 1887 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1888 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1889 xr += w; yr += h; xl = -w; yl = -h; 1890 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1891 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1892 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatView_SeqBAIJ" 1898 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1899 { 1900 PetscErrorCode ierr; 1901 PetscBool iascii,isbinary,isdraw; 1902 1903 PetscFunctionBegin; 1904 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1905 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1906 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1907 if (iascii){ 1908 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1909 } else if (isbinary) { 1910 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1911 } else if (isdraw) { 1912 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1913 } else { 1914 Mat B; 1915 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1916 ierr = MatView(B,viewer);CHKERRQ(ierr); 1917 ierr = MatDestroy(&B);CHKERRQ(ierr); 1918 } 1919 PetscFunctionReturn(0); 1920 } 1921 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1925 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1926 { 1927 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1928 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1929 PetscInt *ai = a->i,*ailen = a->ilen; 1930 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1931 MatScalar *ap,*aa = a->a; 1932 1933 PetscFunctionBegin; 1934 for (k=0; k<m; k++) { /* loop over rows */ 1935 row = im[k]; brow = row/bs; 1936 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1937 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1938 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1939 nrow = ailen[brow]; 1940 for (l=0; l<n; l++) { /* loop over columns */ 1941 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1942 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1943 col = in[l] ; 1944 bcol = col/bs; 1945 cidx = col%bs; 1946 ridx = row%bs; 1947 high = nrow; 1948 low = 0; /* assume unsorted */ 1949 while (high-low > 5) { 1950 t = (low+high)/2; 1951 if (rp[t] > bcol) high = t; 1952 else low = t; 1953 } 1954 for (i=low; i<high; i++) { 1955 if (rp[i] > bcol) break; 1956 if (rp[i] == bcol) { 1957 *v++ = ap[bs2*i+bs*cidx+ridx]; 1958 goto finished; 1959 } 1960 } 1961 *v++ = 0.0; 1962 finished:; 1963 } 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1970 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1971 { 1972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1973 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1974 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1975 PetscErrorCode ierr; 1976 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1977 PetscBool roworiented=a->roworiented; 1978 const PetscScalar *value = v; 1979 MatScalar *ap,*aa = a->a,*bap; 1980 1981 PetscFunctionBegin; 1982 if (roworiented) { 1983 stepval = (n-1)*bs; 1984 } else { 1985 stepval = (m-1)*bs; 1986 } 1987 for (k=0; k<m; k++) { /* loop over added rows */ 1988 row = im[k]; 1989 if (row < 0) continue; 1990 #if defined(PETSC_USE_DEBUG) 1991 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1992 #endif 1993 rp = aj + ai[row]; 1994 ap = aa + bs2*ai[row]; 1995 rmax = imax[row]; 1996 nrow = ailen[row]; 1997 low = 0; 1998 high = nrow; 1999 for (l=0; l<n; l++) { /* loop over added columns */ 2000 if (in[l] < 0) continue; 2001 #if defined(PETSC_USE_DEBUG) 2002 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2003 #endif 2004 col = in[l]; 2005 if (roworiented) { 2006 value = v + (k*(stepval+bs) + l)*bs; 2007 } else { 2008 value = v + (l*(stepval+bs) + k)*bs; 2009 } 2010 if (col <= lastcol) low = 0; else high = nrow; 2011 lastcol = col; 2012 while (high-low > 7) { 2013 t = (low+high)/2; 2014 if (rp[t] > col) high = t; 2015 else low = t; 2016 } 2017 for (i=low; i<high; i++) { 2018 if (rp[i] > col) break; 2019 if (rp[i] == col) { 2020 bap = ap + bs2*i; 2021 if (roworiented) { 2022 if (is == ADD_VALUES) { 2023 for (ii=0; ii<bs; ii++,value+=stepval) { 2024 for (jj=ii; jj<bs2; jj+=bs) { 2025 bap[jj] += *value++; 2026 } 2027 } 2028 } else { 2029 for (ii=0; ii<bs; ii++,value+=stepval) { 2030 for (jj=ii; jj<bs2; jj+=bs) { 2031 bap[jj] = *value++; 2032 } 2033 } 2034 } 2035 } else { 2036 if (is == ADD_VALUES) { 2037 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2038 for (jj=0; jj<bs; jj++) { 2039 bap[jj] += value[jj]; 2040 } 2041 bap += bs; 2042 } 2043 } else { 2044 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2045 for (jj=0; jj<bs; jj++) { 2046 bap[jj] = value[jj]; 2047 } 2048 bap += bs; 2049 } 2050 } 2051 } 2052 goto noinsert2; 2053 } 2054 } 2055 if (nonew == 1) goto noinsert2; 2056 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2057 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2058 N = nrow++ - 1; high++; 2059 /* shift up all the later entries in this row */ 2060 for (ii=N; ii>=i; ii--) { 2061 rp[ii+1] = rp[ii]; 2062 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2063 } 2064 if (N >= i) { 2065 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2066 } 2067 rp[i] = col; 2068 bap = ap + bs2*i; 2069 if (roworiented) { 2070 for (ii=0; ii<bs; ii++,value+=stepval) { 2071 for (jj=ii; jj<bs2; jj+=bs) { 2072 bap[jj] = *value++; 2073 } 2074 } 2075 } else { 2076 for (ii=0; ii<bs; ii++,value+=stepval) { 2077 for (jj=0; jj<bs; jj++) { 2078 *bap++ = *value++; 2079 } 2080 } 2081 } 2082 noinsert2:; 2083 low = i; 2084 } 2085 ailen[row] = nrow; 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2092 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2093 { 2094 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2095 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2096 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2097 PetscErrorCode ierr; 2098 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2099 MatScalar *aa = a->a,*ap; 2100 PetscReal ratio=0.6; 2101 2102 PetscFunctionBegin; 2103 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2104 2105 if (m) rmax = ailen[0]; 2106 for (i=1; i<mbs; i++) { 2107 /* move each row back by the amount of empty slots (fshift) before it*/ 2108 fshift += imax[i-1] - ailen[i-1]; 2109 rmax = PetscMax(rmax,ailen[i]); 2110 if (fshift) { 2111 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2112 N = ailen[i]; 2113 for (j=0; j<N; j++) { 2114 ip[j-fshift] = ip[j]; 2115 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2116 } 2117 } 2118 ai[i] = ai[i-1] + ailen[i-1]; 2119 } 2120 if (mbs) { 2121 fshift += imax[mbs-1] - ailen[mbs-1]; 2122 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2123 } 2124 /* reset ilen and imax for each row */ 2125 for (i=0; i<mbs; i++) { 2126 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2127 } 2128 a->nz = ai[mbs]; 2129 2130 /* diagonals may have moved, so kill the diagonal pointers */ 2131 a->idiagvalid = PETSC_FALSE; 2132 if (fshift && a->diag) { 2133 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2134 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2135 a->diag = 0; 2136 } 2137 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2138 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2139 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2140 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2141 A->info.mallocs += a->reallocs; 2142 a->reallocs = 0; 2143 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2144 2145 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2146 A->same_nonzero = PETSC_TRUE; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* 2151 This function returns an array of flags which indicate the locations of contiguous 2152 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2153 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2154 Assume: sizes should be long enough to hold all the values. 2155 */ 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2158 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2159 { 2160 PetscInt i,j,k,row; 2161 PetscBool flg; 2162 2163 PetscFunctionBegin; 2164 for (i=0,j=0; i<n; j++) { 2165 row = idx[i]; 2166 if (row%bs!=0) { /* Not the begining of a block */ 2167 sizes[j] = 1; 2168 i++; 2169 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2170 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2171 i++; 2172 } else { /* Begining of the block, so check if the complete block exists */ 2173 flg = PETSC_TRUE; 2174 for (k=1; k<bs; k++) { 2175 if (row+k != idx[i+k]) { /* break in the block */ 2176 flg = PETSC_FALSE; 2177 break; 2178 } 2179 } 2180 if (flg) { /* No break in the bs */ 2181 sizes[j] = bs; 2182 i+= bs; 2183 } else { 2184 sizes[j] = 1; 2185 i++; 2186 } 2187 } 2188 } 2189 *bs_max = j; 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2195 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2196 { 2197 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2198 PetscErrorCode ierr; 2199 PetscInt i,j,k,count,*rows; 2200 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2201 PetscScalar zero = 0.0; 2202 MatScalar *aa; 2203 const PetscScalar *xx; 2204 PetscScalar *bb; 2205 2206 PetscFunctionBegin; 2207 /* fix right hand side if needed */ 2208 if (x && b) { 2209 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2210 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2211 for (i=0; i<is_n; i++) { 2212 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2213 } 2214 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2215 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2216 } 2217 2218 /* Make a copy of the IS and sort it */ 2219 /* allocate memory for rows,sizes */ 2220 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2221 2222 /* copy IS values to rows, and sort them */ 2223 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2224 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2225 2226 if (baij->keepnonzeropattern) { 2227 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2228 bs_max = is_n; 2229 A->same_nonzero = PETSC_TRUE; 2230 } else { 2231 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2232 A->same_nonzero = PETSC_FALSE; 2233 } 2234 2235 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2236 row = rows[j]; 2237 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2238 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2239 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2240 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2241 if (diag != (PetscScalar)0.0) { 2242 if (baij->ilen[row/bs] > 0) { 2243 baij->ilen[row/bs] = 1; 2244 baij->j[baij->i[row/bs]] = row/bs; 2245 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2246 } 2247 /* Now insert all the diagonal values for this bs */ 2248 for (k=0; k<bs; k++) { 2249 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2250 } 2251 } else { /* (diag == 0.0) */ 2252 baij->ilen[row/bs] = 0; 2253 } /* end (diag == 0.0) */ 2254 } else { /* (sizes[i] != bs) */ 2255 #if defined (PETSC_USE_DEBUG) 2256 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2257 #endif 2258 for (k=0; k<count; k++) { 2259 aa[0] = zero; 2260 aa += bs; 2261 } 2262 if (diag != (PetscScalar)0.0) { 2263 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2264 } 2265 } 2266 } 2267 2268 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2269 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2275 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2276 { 2277 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2278 PetscErrorCode ierr; 2279 PetscInt i,j,k,count; 2280 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2281 PetscScalar zero = 0.0; 2282 MatScalar *aa; 2283 const PetscScalar *xx; 2284 PetscScalar *bb; 2285 PetscBool *zeroed,vecs = PETSC_FALSE; 2286 2287 PetscFunctionBegin; 2288 /* fix right hand side if needed */ 2289 if (x && b) { 2290 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2291 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2292 vecs = PETSC_TRUE; 2293 } 2294 A->same_nonzero = PETSC_TRUE; 2295 2296 /* zero the columns */ 2297 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2298 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2299 for (i=0; i<is_n; i++) { 2300 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2301 zeroed[is_idx[i]] = PETSC_TRUE; 2302 } 2303 for (i=0; i<A->rmap->N; i++) { 2304 if (!zeroed[i]) { 2305 row = i/bs; 2306 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2307 for (k=0; k<bs; k++) { 2308 col = bs*baij->j[j] + k; 2309 if (zeroed[col]) { 2310 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2311 if (vecs) bb[i] -= aa[0]*xx[col]; 2312 aa[0] = 0.0; 2313 } 2314 } 2315 } 2316 } else if (vecs) bb[i] = diag*xx[i]; 2317 } 2318 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2319 if (vecs) { 2320 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2321 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2322 } 2323 2324 /* zero the rows */ 2325 for (i=0; i<is_n; i++) { 2326 row = is_idx[i]; 2327 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2328 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2329 for (k=0; k<count; k++) { 2330 aa[0] = zero; 2331 aa += bs; 2332 } 2333 if (diag != (PetscScalar)0.0) { 2334 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2335 } 2336 } 2337 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2343 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2344 { 2345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2346 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2347 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2348 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2349 PetscErrorCode ierr; 2350 PetscInt ridx,cidx,bs2=a->bs2; 2351 PetscBool roworiented=a->roworiented; 2352 MatScalar *ap,value,*aa=a->a,*bap; 2353 2354 PetscFunctionBegin; 2355 if (v) PetscValidScalarPointer(v,6); 2356 for (k=0; k<m; k++) { /* loop over added rows */ 2357 row = im[k]; 2358 brow = row/bs; 2359 if (row < 0) continue; 2360 #if defined(PETSC_USE_DEBUG) 2361 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2362 #endif 2363 rp = aj + ai[brow]; 2364 ap = aa + bs2*ai[brow]; 2365 rmax = imax[brow]; 2366 nrow = ailen[brow]; 2367 low = 0; 2368 high = nrow; 2369 for (l=0; l<n; l++) { /* loop over added columns */ 2370 if (in[l] < 0) continue; 2371 #if defined(PETSC_USE_DEBUG) 2372 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2373 #endif 2374 col = in[l]; bcol = col/bs; 2375 ridx = row % bs; cidx = col % bs; 2376 if (roworiented) { 2377 value = v[l + k*n]; 2378 } else { 2379 value = v[k + l*m]; 2380 } 2381 if (col <= lastcol) low = 0; else high = nrow; 2382 lastcol = col; 2383 while (high-low > 7) { 2384 t = (low+high)/2; 2385 if (rp[t] > bcol) high = t; 2386 else low = t; 2387 } 2388 for (i=low; i<high; i++) { 2389 if (rp[i] > bcol) break; 2390 if (rp[i] == bcol) { 2391 bap = ap + bs2*i + bs*cidx + ridx; 2392 if (is == ADD_VALUES) *bap += value; 2393 else *bap = value; 2394 goto noinsert1; 2395 } 2396 } 2397 if (nonew == 1) goto noinsert1; 2398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2399 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2400 N = nrow++ - 1; high++; 2401 /* shift up all the later entries in this row */ 2402 for (ii=N; ii>=i; ii--) { 2403 rp[ii+1] = rp[ii]; 2404 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2405 } 2406 if (N>=i) { 2407 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2408 } 2409 rp[i] = bcol; 2410 ap[bs2*i + bs*cidx + ridx] = value; 2411 a->nz++; 2412 noinsert1:; 2413 low = i; 2414 } 2415 ailen[brow] = nrow; 2416 } 2417 A->same_nonzero = PETSC_FALSE; 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2423 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2424 { 2425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2426 Mat outA; 2427 PetscErrorCode ierr; 2428 PetscBool row_identity,col_identity; 2429 2430 PetscFunctionBegin; 2431 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2432 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2433 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2434 if (!row_identity || !col_identity) { 2435 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2436 } 2437 2438 outA = inA; 2439 inA->factortype = MAT_FACTOR_LU; 2440 2441 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2442 2443 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2444 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2445 a->row = row; 2446 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2447 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2448 a->col = col; 2449 2450 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2451 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2452 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2453 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2454 2455 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2456 if (!a->solve_work) { 2457 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2458 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2459 } 2460 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2461 2462 PetscFunctionReturn(0); 2463 } 2464 2465 EXTERN_C_BEGIN 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2468 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2469 { 2470 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2471 PetscInt i,nz,mbs; 2472 2473 PetscFunctionBegin; 2474 nz = baij->maxnz; 2475 mbs = baij->mbs; 2476 for (i=0; i<nz; i++) { 2477 baij->j[i] = indices[i]; 2478 } 2479 baij->nz = nz; 2480 for (i=0; i<mbs; i++) { 2481 baij->ilen[i] = baij->imax[i]; 2482 } 2483 PetscFunctionReturn(0); 2484 } 2485 EXTERN_C_END 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2489 /*@ 2490 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2491 in the matrix. 2492 2493 Input Parameters: 2494 + mat - the SeqBAIJ matrix 2495 - indices - the column indices 2496 2497 Level: advanced 2498 2499 Notes: 2500 This can be called if you have precomputed the nonzero structure of the 2501 matrix and want to provide it to the matrix object to improve the performance 2502 of the MatSetValues() operation. 2503 2504 You MUST have set the correct numbers of nonzeros per row in the call to 2505 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2506 2507 MUST be called before any calls to MatSetValues(); 2508 2509 @*/ 2510 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2511 { 2512 PetscErrorCode ierr; 2513 2514 PetscFunctionBegin; 2515 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2516 PetscValidPointer(indices,2); 2517 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2523 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2524 { 2525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2526 PetscErrorCode ierr; 2527 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2528 PetscReal atmp; 2529 PetscScalar *x,zero = 0.0; 2530 MatScalar *aa; 2531 PetscInt ncols,brow,krow,kcol; 2532 2533 PetscFunctionBegin; 2534 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2535 bs = A->rmap->bs; 2536 aa = a->a; 2537 ai = a->i; 2538 aj = a->j; 2539 mbs = a->mbs; 2540 2541 ierr = VecSet(v,zero);CHKERRQ(ierr); 2542 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2543 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2544 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2545 for (i=0; i<mbs; i++) { 2546 ncols = ai[1] - ai[0]; ai++; 2547 brow = bs*i; 2548 for (j=0; j<ncols; j++){ 2549 for (kcol=0; kcol<bs; kcol++){ 2550 for (krow=0; krow<bs; krow++){ 2551 atmp = PetscAbsScalar(*aa);aa++; 2552 row = brow + krow; /* row index */ 2553 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2554 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2555 } 2556 } 2557 aj++; 2558 } 2559 } 2560 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatCopy_SeqBAIJ" 2566 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2567 { 2568 PetscErrorCode ierr; 2569 2570 PetscFunctionBegin; 2571 /* If the two matrices have the same copy implementation, use fast copy. */ 2572 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2573 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2574 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2575 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2576 2577 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2578 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2579 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2580 } else { 2581 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2582 } 2583 PetscFunctionReturn(0); 2584 } 2585 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2588 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2589 { 2590 PetscErrorCode ierr; 2591 2592 PetscFunctionBegin; 2593 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2599 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2600 { 2601 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2602 PetscFunctionBegin; 2603 *array = a->a; 2604 PetscFunctionReturn(0); 2605 } 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2609 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2610 { 2611 PetscFunctionBegin; 2612 PetscFunctionReturn(0); 2613 } 2614 2615 #undef __FUNCT__ 2616 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2617 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2618 { 2619 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2620 PetscErrorCode ierr; 2621 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2622 PetscBLASInt one=1; 2623 2624 PetscFunctionBegin; 2625 if (str == SAME_NONZERO_PATTERN) { 2626 PetscScalar alpha = a; 2627 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2628 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2629 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2630 if (y->xtoy && y->XtoY != X) { 2631 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2632 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2633 } 2634 if (!y->xtoy) { /* get xtoy */ 2635 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2636 y->XtoY = X; 2637 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2638 } 2639 for (i=0; i<x->nz; i++) { 2640 j = 0; 2641 while (j < bs2){ 2642 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2643 j++; 2644 } 2645 } 2646 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2647 } else { 2648 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2649 } 2650 PetscFunctionReturn(0); 2651 } 2652 2653 #undef __FUNCT__ 2654 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2655 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2656 { 2657 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2658 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2659 MatScalar *aa = a->a; 2660 2661 PetscFunctionBegin; 2662 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2663 PetscFunctionReturn(0); 2664 } 2665 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2668 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2669 { 2670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2671 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2672 MatScalar *aa = a->a; 2673 2674 PetscFunctionBegin; 2675 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2676 PetscFunctionReturn(0); 2677 } 2678 2679 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2683 /* 2684 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2685 */ 2686 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2687 { 2688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2689 PetscErrorCode ierr; 2690 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2691 PetscInt nz = a->i[m],row,*jj,mr,col; 2692 2693 PetscFunctionBegin; 2694 *nn = n; 2695 if (!ia) PetscFunctionReturn(0); 2696 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2697 else { 2698 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2699 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2700 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2701 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2702 jj = a->j; 2703 for (i=0; i<nz; i++) { 2704 collengths[jj[i]]++; 2705 } 2706 cia[0] = oshift; 2707 for (i=0; i<n; i++) { 2708 cia[i+1] = cia[i] + collengths[i]; 2709 } 2710 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2711 jj = a->j; 2712 for (row=0; row<m; row++) { 2713 mr = a->i[row+1] - a->i[row]; 2714 for (i=0; i<mr; i++) { 2715 col = *jj++; 2716 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2717 } 2718 } 2719 ierr = PetscFree(collengths);CHKERRQ(ierr); 2720 *ia = cia; *ja = cja; 2721 } 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2727 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2728 { 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 if (!ia) PetscFunctionReturn(0); 2733 ierr = PetscFree(*ia);CHKERRQ(ierr); 2734 ierr = PetscFree(*ja);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2740 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2741 { 2742 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2743 PetscErrorCode ierr; 2744 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2745 PetscScalar dx,*y,*xx,*w3_array; 2746 PetscScalar *vscale_array; 2747 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2748 Vec w1=coloring->w1,w2=coloring->w2,w3; 2749 void *fctx = coloring->fctx; 2750 PetscBool flg = PETSC_FALSE; 2751 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2752 Vec x1_tmp; 2753 2754 PetscFunctionBegin; 2755 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2756 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2757 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2758 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2759 2760 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2761 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2762 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2763 if (flg) { 2764 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2765 } else { 2766 PetscBool assembled; 2767 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2768 if (assembled) { 2769 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2770 } 2771 } 2772 2773 x1_tmp = x1; 2774 if (!coloring->vscale){ 2775 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2776 } 2777 2778 /* 2779 This is a horrible, horrible, hack. 2780 */ 2781 if (coloring->F) { 2782 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2783 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2784 if (m1 != m2) { 2785 coloring->F = 0; 2786 } 2787 } 2788 2789 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2790 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2791 } 2792 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2793 2794 /* Set w1 = F(x1) */ 2795 if (coloring->F) { 2796 w1 = coloring->F; /* use already computed value of function */ 2797 coloring->F = 0; 2798 } else { 2799 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2800 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2801 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2802 } 2803 2804 if (!coloring->w3) { 2805 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2806 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2807 } 2808 w3 = coloring->w3; 2809 2810 CHKMEMQ; 2811 /* Compute all the local scale factors, including ghost points */ 2812 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2813 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2814 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2815 if (ctype == IS_COLORING_GHOSTED){ 2816 col_start = 0; col_end = N; 2817 } else if (ctype == IS_COLORING_GLOBAL){ 2818 xx = xx - start; 2819 vscale_array = vscale_array - start; 2820 col_start = start; col_end = N + start; 2821 } CHKMEMQ; 2822 for (col=col_start; col<col_end; col++){ 2823 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2824 if (coloring->htype[0] == 'w') { 2825 dx = 1.0 + unorm; 2826 } else { 2827 dx = xx[col]; 2828 } 2829 if (dx == (PetscScalar)0.0) dx = 1.0; 2830 #if !defined(PETSC_USE_COMPLEX) 2831 if (dx < umin && dx >= 0.0) dx = umin; 2832 else if (dx < 0.0 && dx > -umin) dx = -umin; 2833 #else 2834 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2835 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2836 #endif 2837 dx *= epsilon; 2838 vscale_array[col] = (PetscScalar)1.0/dx; 2839 } CHKMEMQ; 2840 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2841 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2842 if (ctype == IS_COLORING_GLOBAL){ 2843 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 } 2846 CHKMEMQ; 2847 if (coloring->vscaleforrow) { 2848 vscaleforrow = coloring->vscaleforrow; 2849 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2850 2851 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2852 /* 2853 Loop over each color 2854 */ 2855 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2856 for (k=0; k<coloring->ncolors; k++) { 2857 coloring->currentcolor = k; 2858 for (i=0; i<bs; i++) { 2859 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2860 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2861 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2862 /* 2863 Loop over each column associated with color 2864 adding the perturbation to the vector w3. 2865 */ 2866 for (l=0; l<coloring->ncolumns[k]; l++) { 2867 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2868 if (coloring->htype[0] == 'w') { 2869 dx = 1.0 + unorm; 2870 } else { 2871 dx = xx[col]; 2872 } 2873 if (dx == (PetscScalar)0.0) dx = 1.0; 2874 #if !defined(PETSC_USE_COMPLEX) 2875 if (dx < umin && dx >= 0.0) dx = umin; 2876 else if (dx < 0.0 && dx > -umin) dx = -umin; 2877 #else 2878 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2879 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2880 #endif 2881 dx *= epsilon; 2882 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2883 w3_array[col] += dx; 2884 } 2885 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2886 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2887 2888 /* 2889 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2890 w2 = F(x1 + dx) - F(x1) 2891 */ 2892 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2893 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2894 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2895 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2896 2897 /* 2898 Loop over rows of vector, putting results into Jacobian matrix 2899 */ 2900 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2901 for (l=0; l<coloring->nrows[k]; l++) { 2902 row = bs*coloring->rows[k][l]; /* local row index */ 2903 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2904 for (j=0; j<bs; j++) { 2905 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2906 srows[j] = row + start + j; 2907 } 2908 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2909 } 2910 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2911 } 2912 } /* endof for each color */ 2913 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2914 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2915 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2916 ierr = PetscFree(srows);CHKERRQ(ierr); 2917 2918 coloring->currentcolor = -1; 2919 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2920 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2921 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 /* -------------------------------------------------------------------*/ 2926 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2927 MatGetRow_SeqBAIJ, 2928 MatRestoreRow_SeqBAIJ, 2929 MatMult_SeqBAIJ_N, 2930 /* 4*/ MatMultAdd_SeqBAIJ_N, 2931 MatMultTranspose_SeqBAIJ, 2932 MatMultTransposeAdd_SeqBAIJ, 2933 0, 2934 0, 2935 0, 2936 /*10*/ 0, 2937 MatLUFactor_SeqBAIJ, 2938 0, 2939 0, 2940 MatTranspose_SeqBAIJ, 2941 /*15*/ MatGetInfo_SeqBAIJ, 2942 MatEqual_SeqBAIJ, 2943 MatGetDiagonal_SeqBAIJ, 2944 MatDiagonalScale_SeqBAIJ, 2945 MatNorm_SeqBAIJ, 2946 /*20*/ 0, 2947 MatAssemblyEnd_SeqBAIJ, 2948 MatSetOption_SeqBAIJ, 2949 MatZeroEntries_SeqBAIJ, 2950 /*24*/ MatZeroRows_SeqBAIJ, 2951 0, 2952 0, 2953 0, 2954 0, 2955 /*29*/ MatSetUp_SeqBAIJ, 2956 0, 2957 0, 2958 MatGetArray_SeqBAIJ, 2959 MatRestoreArray_SeqBAIJ, 2960 /*34*/ MatDuplicate_SeqBAIJ, 2961 0, 2962 0, 2963 MatILUFactor_SeqBAIJ, 2964 0, 2965 /*39*/ MatAXPY_SeqBAIJ, 2966 MatGetSubMatrices_SeqBAIJ, 2967 MatIncreaseOverlap_SeqBAIJ, 2968 MatGetValues_SeqBAIJ, 2969 MatCopy_SeqBAIJ, 2970 /*44*/ 0, 2971 MatScale_SeqBAIJ, 2972 0, 2973 0, 2974 MatZeroRowsColumns_SeqBAIJ, 2975 /*49*/ 0, 2976 MatGetRowIJ_SeqBAIJ, 2977 MatRestoreRowIJ_SeqBAIJ, 2978 MatGetColumnIJ_SeqBAIJ, 2979 MatRestoreColumnIJ_SeqBAIJ, 2980 /*54*/ MatFDColoringCreate_SeqAIJ, 2981 0, 2982 0, 2983 0, 2984 MatSetValuesBlocked_SeqBAIJ, 2985 /*59*/ MatGetSubMatrix_SeqBAIJ, 2986 MatDestroy_SeqBAIJ, 2987 MatView_SeqBAIJ, 2988 0, 2989 0, 2990 /*64*/ 0, 2991 0, 2992 0, 2993 0, 2994 0, 2995 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2996 0, 2997 MatConvert_Basic, 2998 0, 2999 0, 3000 /*74*/ 0, 3001 MatFDColoringApply_BAIJ, 3002 0, 3003 0, 3004 0, 3005 /*79*/ 0, 3006 0, 3007 0, 3008 0, 3009 MatLoad_SeqBAIJ, 3010 /*84*/ 0, 3011 0, 3012 0, 3013 0, 3014 0, 3015 /*89*/ 0, 3016 0, 3017 0, 3018 0, 3019 0, 3020 /*94*/ 0, 3021 0, 3022 0, 3023 0, 3024 0, 3025 /*99*/0, 3026 0, 3027 0, 3028 0, 3029 0, 3030 /*104*/0, 3031 MatRealPart_SeqBAIJ, 3032 MatImaginaryPart_SeqBAIJ, 3033 0, 3034 0, 3035 /*109*/0, 3036 0, 3037 0, 3038 0, 3039 MatMissingDiagonal_SeqBAIJ, 3040 /*114*/0, 3041 0, 3042 0, 3043 0, 3044 0, 3045 /*119*/0, 3046 0, 3047 MatMultHermitianTranspose_SeqBAIJ, 3048 MatMultHermitianTransposeAdd_SeqBAIJ, 3049 0, 3050 /*124*/0, 3051 0, 3052 MatInvertBlockDiagonal_SeqBAIJ 3053 }; 3054 3055 EXTERN_C_BEGIN 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3058 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3059 { 3060 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3061 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3062 PetscErrorCode ierr; 3063 3064 PetscFunctionBegin; 3065 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3066 3067 /* allocate space for values if not already there */ 3068 if (!aij->saved_values) { 3069 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3070 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3071 } 3072 3073 /* copy values over */ 3074 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3075 PetscFunctionReturn(0); 3076 } 3077 EXTERN_C_END 3078 3079 EXTERN_C_BEGIN 3080 #undef __FUNCT__ 3081 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3082 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3083 { 3084 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3085 PetscErrorCode ierr; 3086 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3087 3088 PetscFunctionBegin; 3089 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3090 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3091 3092 /* copy values over */ 3093 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 EXTERN_C_END 3097 3098 EXTERN_C_BEGIN 3099 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3100 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3101 EXTERN_C_END 3102 3103 EXTERN_C_BEGIN 3104 #undef __FUNCT__ 3105 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3106 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3107 { 3108 Mat_SeqBAIJ *b; 3109 PetscErrorCode ierr; 3110 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3111 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3112 3113 PetscFunctionBegin; 3114 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3115 if (nz == MAT_SKIP_ALLOCATION) { 3116 skipallocation = PETSC_TRUE; 3117 nz = 0; 3118 } 3119 3120 if (bs < 0) { 3121 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3122 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3123 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3124 bs = PetscAbs(bs); 3125 } 3126 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3127 bs = newbs; 3128 3129 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3130 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3131 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3132 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3133 3134 B->preallocated = PETSC_TRUE; 3135 3136 mbs = B->rmap->n/bs; 3137 nbs = B->cmap->n/bs; 3138 bs2 = bs*bs; 3139 3140 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3141 3142 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3143 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3144 if (nnz) { 3145 for (i=0; i<mbs; i++) { 3146 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3147 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3148 } 3149 } 3150 3151 b = (Mat_SeqBAIJ*)B->data; 3152 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3153 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3154 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3155 3156 if (!flg) { 3157 switch (bs) { 3158 case 1: 3159 B->ops->mult = MatMult_SeqBAIJ_1; 3160 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3161 B->ops->sor = MatSOR_SeqBAIJ_1; 3162 break; 3163 case 2: 3164 B->ops->mult = MatMult_SeqBAIJ_2; 3165 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3166 B->ops->sor = MatSOR_SeqBAIJ_2; 3167 break; 3168 case 3: 3169 B->ops->mult = MatMult_SeqBAIJ_3; 3170 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3171 B->ops->sor = MatSOR_SeqBAIJ_3; 3172 break; 3173 case 4: 3174 B->ops->mult = MatMult_SeqBAIJ_4; 3175 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3176 B->ops->sor = MatSOR_SeqBAIJ_4; 3177 break; 3178 case 5: 3179 B->ops->mult = MatMult_SeqBAIJ_5; 3180 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3181 B->ops->sor = MatSOR_SeqBAIJ_5; 3182 break; 3183 case 6: 3184 B->ops->mult = MatMult_SeqBAIJ_6; 3185 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3186 B->ops->sor = MatSOR_SeqBAIJ_6; 3187 break; 3188 case 7: 3189 B->ops->mult = MatMult_SeqBAIJ_7; 3190 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3191 B->ops->sor = MatSOR_SeqBAIJ_7; 3192 break; 3193 case 15: 3194 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3195 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3196 B->ops->sor = MatSOR_SeqBAIJ_N; 3197 break; 3198 default: 3199 B->ops->mult = MatMult_SeqBAIJ_N; 3200 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3201 B->ops->sor = MatSOR_SeqBAIJ_N; 3202 break; 3203 } 3204 } 3205 B->rmap->bs = bs; 3206 b->mbs = mbs; 3207 b->nbs = nbs; 3208 if (!skipallocation) { 3209 if (!b->imax) { 3210 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3211 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3212 b->free_imax_ilen = PETSC_TRUE; 3213 } 3214 /* b->ilen will count nonzeros in each block row so far. */ 3215 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3216 if (!nnz) { 3217 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3218 else if (nz < 0) nz = 1; 3219 for (i=0; i<mbs; i++) b->imax[i] = nz; 3220 nz = nz*mbs; 3221 } else { 3222 nz = 0; 3223 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3224 } 3225 3226 /* allocate the matrix space */ 3227 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3228 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3229 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3230 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3231 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3232 b->singlemalloc = PETSC_TRUE; 3233 b->i[0] = 0; 3234 for (i=1; i<mbs+1; i++) { 3235 b->i[i] = b->i[i-1] + b->imax[i-1]; 3236 } 3237 b->free_a = PETSC_TRUE; 3238 b->free_ij = PETSC_TRUE; 3239 } else { 3240 b->free_a = PETSC_FALSE; 3241 b->free_ij = PETSC_FALSE; 3242 } 3243 3244 B->rmap->bs = bs; 3245 b->bs2 = bs2; 3246 b->mbs = mbs; 3247 b->nz = 0; 3248 b->maxnz = nz; 3249 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3250 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3251 PetscFunctionReturn(0); 3252 } 3253 EXTERN_C_END 3254 3255 EXTERN_C_BEGIN 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3258 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3259 { 3260 PetscInt i,m,nz,nz_max=0,*nnz; 3261 PetscScalar *values=0; 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3266 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3267 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3268 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3269 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3270 m = B->rmap->n/bs; 3271 3272 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3273 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3274 for(i=0; i<m; i++) { 3275 nz = ii[i+1]- ii[i]; 3276 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3277 nz_max = PetscMax(nz_max, nz); 3278 nnz[i] = nz; 3279 } 3280 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3281 ierr = PetscFree(nnz);CHKERRQ(ierr); 3282 3283 values = (PetscScalar*)V; 3284 if (!values) { 3285 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3286 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3287 } 3288 for (i=0; i<m; i++) { 3289 PetscInt ncols = ii[i+1] - ii[i]; 3290 const PetscInt *icols = jj + ii[i]; 3291 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3292 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3293 } 3294 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3295 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3296 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3297 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 EXTERN_C_END 3301 3302 3303 EXTERN_C_BEGIN 3304 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3305 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3306 #if defined(PETSC_HAVE_MUMPS) 3307 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3308 #endif 3309 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3310 EXTERN_C_END 3311 3312 /*MC 3313 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3314 block sparse compressed row format. 3315 3316 Options Database Keys: 3317 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3318 3319 Level: beginner 3320 3321 .seealso: MatCreateSeqBAIJ() 3322 M*/ 3323 3324 EXTERN_C_BEGIN 3325 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3326 EXTERN_C_END 3327 3328 EXTERN_C_BEGIN 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatCreate_SeqBAIJ" 3331 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3332 { 3333 PetscErrorCode ierr; 3334 PetscMPIInt size; 3335 Mat_SeqBAIJ *b; 3336 3337 PetscFunctionBegin; 3338 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3339 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3340 3341 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3342 B->data = (void*)b; 3343 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3344 b->row = 0; 3345 b->col = 0; 3346 b->icol = 0; 3347 b->reallocs = 0; 3348 b->saved_values = 0; 3349 3350 b->roworiented = PETSC_TRUE; 3351 b->nonew = 0; 3352 b->diag = 0; 3353 b->solve_work = 0; 3354 b->mult_work = 0; 3355 B->spptr = 0; 3356 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3357 b->keepnonzeropattern = PETSC_FALSE; 3358 b->xtoy = 0; 3359 b->XtoY = 0; 3360 B->same_nonzero = PETSC_FALSE; 3361 3362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3363 "MatGetFactorAvailable_seqbaij_petsc", 3364 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3366 "MatGetFactor_seqbaij_petsc", 3367 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3368 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3369 "MatGetFactor_seqbaij_bstrm", 3370 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3371 #if defined(PETSC_HAVE_MUMPS) 3372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3373 #endif 3374 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3375 "MatInvertBlockDiagonal_SeqBAIJ", 3376 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3377 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3378 "MatStoreValues_SeqBAIJ", 3379 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3380 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3381 "MatRetrieveValues_SeqBAIJ", 3382 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3383 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3384 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3385 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3386 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3387 "MatConvert_SeqBAIJ_SeqAIJ", 3388 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3389 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3390 "MatConvert_SeqBAIJ_SeqSBAIJ", 3391 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3392 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3393 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3394 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3395 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3396 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3397 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3398 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3399 "MatConvert_SeqBAIJ_SeqBSTRM", 3400 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3402 "MatIsTranspose_SeqBAIJ", 3403 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3404 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3405 PetscFunctionReturn(0); 3406 } 3407 EXTERN_C_END 3408 3409 #undef __FUNCT__ 3410 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3411 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3412 { 3413 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3414 PetscErrorCode ierr; 3415 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3416 3417 PetscFunctionBegin; 3418 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3419 3420 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3421 c->imax = a->imax; 3422 c->ilen = a->ilen; 3423 c->free_imax_ilen = PETSC_FALSE; 3424 } else { 3425 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3426 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3427 for (i=0; i<mbs; i++) { 3428 c->imax[i] = a->imax[i]; 3429 c->ilen[i] = a->ilen[i]; 3430 } 3431 c->free_imax_ilen = PETSC_TRUE; 3432 } 3433 3434 /* allocate the matrix space */ 3435 if (mallocmatspace){ 3436 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3437 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3438 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3439 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3440 c->i = a->i; 3441 c->j = a->j; 3442 c->singlemalloc = PETSC_FALSE; 3443 c->free_a = PETSC_TRUE; 3444 c->free_ij = PETSC_FALSE; 3445 c->parent = A; 3446 C->preallocated = PETSC_TRUE; 3447 C->assembled = PETSC_TRUE; 3448 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3449 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3450 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3451 } else { 3452 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3453 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3454 c->singlemalloc = PETSC_TRUE; 3455 c->free_a = PETSC_TRUE; 3456 c->free_ij = PETSC_TRUE; 3457 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3458 if (mbs > 0) { 3459 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3460 if (cpvalues == MAT_COPY_VALUES) { 3461 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3462 } else { 3463 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3464 } 3465 } 3466 C->preallocated = PETSC_TRUE; 3467 C->assembled = PETSC_TRUE; 3468 } 3469 } 3470 3471 c->roworiented = a->roworiented; 3472 c->nonew = a->nonew; 3473 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3474 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3475 c->bs2 = a->bs2; 3476 c->mbs = a->mbs; 3477 c->nbs = a->nbs; 3478 3479 if (a->diag) { 3480 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3481 c->diag = a->diag; 3482 c->free_diag = PETSC_FALSE; 3483 } else { 3484 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3485 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3486 for (i=0; i<mbs; i++) { 3487 c->diag[i] = a->diag[i]; 3488 } 3489 c->free_diag = PETSC_TRUE; 3490 } 3491 } else c->diag = 0; 3492 c->nz = a->nz; 3493 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3494 c->solve_work = 0; 3495 c->mult_work = 0; 3496 3497 c->compressedrow.use = a->compressedrow.use; 3498 c->compressedrow.nrows = a->compressedrow.nrows; 3499 c->compressedrow.check = a->compressedrow.check; 3500 if (a->compressedrow.use){ 3501 i = a->compressedrow.nrows; 3502 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3503 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3504 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3506 } else { 3507 c->compressedrow.use = PETSC_FALSE; 3508 c->compressedrow.i = PETSC_NULL; 3509 c->compressedrow.rindex = PETSC_NULL; 3510 } 3511 C->same_nonzero = A->same_nonzero; 3512 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3513 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3519 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3520 { 3521 PetscErrorCode ierr; 3522 3523 PetscFunctionBegin; 3524 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3525 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3526 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3527 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3528 PetscFunctionReturn(0); 3529 } 3530 3531 #undef __FUNCT__ 3532 #define __FUNCT__ "MatLoad_SeqBAIJ" 3533 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3534 { 3535 Mat_SeqBAIJ *a; 3536 PetscErrorCode ierr; 3537 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3538 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3539 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3540 PetscInt *masked,nmask,tmp,bs2,ishift; 3541 PetscMPIInt size; 3542 int fd; 3543 PetscScalar *aa; 3544 MPI_Comm comm = ((PetscObject)viewer)->comm; 3545 3546 PetscFunctionBegin; 3547 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3548 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3549 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3550 bs2 = bs*bs; 3551 3552 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3553 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3554 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3555 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3556 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3557 M = header[1]; N = header[2]; nz = header[3]; 3558 3559 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3560 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3561 3562 /* 3563 This code adds extra rows to make sure the number of rows is 3564 divisible by the blocksize 3565 */ 3566 mbs = M/bs; 3567 extra_rows = bs - M + bs*(mbs); 3568 if (extra_rows == bs) extra_rows = 0; 3569 else mbs++; 3570 if (extra_rows) { 3571 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3572 } 3573 3574 /* Set global sizes if not already set */ 3575 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3576 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3577 } else { /* Check if the matrix global sizes are correct */ 3578 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3579 if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */ 3580 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3581 } 3582 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3583 } 3584 3585 /* read in row lengths */ 3586 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3587 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3588 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3589 3590 /* read in column indices */ 3591 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3592 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3593 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3594 3595 /* loop over row lengths determining block row lengths */ 3596 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3597 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3598 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3599 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3600 rowcount = 0; 3601 nzcount = 0; 3602 for (i=0; i<mbs; i++) { 3603 nmask = 0; 3604 for (j=0; j<bs; j++) { 3605 kmax = rowlengths[rowcount]; 3606 for (k=0; k<kmax; k++) { 3607 tmp = jj[nzcount++]/bs; 3608 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3609 } 3610 rowcount++; 3611 } 3612 browlengths[i] += nmask; 3613 /* zero out the mask elements we set */ 3614 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3615 } 3616 3617 /* Do preallocation */ 3618 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3619 a = (Mat_SeqBAIJ*)newmat->data; 3620 3621 /* set matrix "i" values */ 3622 a->i[0] = 0; 3623 for (i=1; i<= mbs; i++) { 3624 a->i[i] = a->i[i-1] + browlengths[i-1]; 3625 a->ilen[i-1] = browlengths[i-1]; 3626 } 3627 a->nz = 0; 3628 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3629 3630 /* read in nonzero values */ 3631 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3632 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3633 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3634 3635 /* set "a" and "j" values into matrix */ 3636 nzcount = 0; jcount = 0; 3637 for (i=0; i<mbs; i++) { 3638 nzcountb = nzcount; 3639 nmask = 0; 3640 for (j=0; j<bs; j++) { 3641 kmax = rowlengths[i*bs+j]; 3642 for (k=0; k<kmax; k++) { 3643 tmp = jj[nzcount++]/bs; 3644 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3645 } 3646 } 3647 /* sort the masked values */ 3648 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3649 3650 /* set "j" values into matrix */ 3651 maskcount = 1; 3652 for (j=0; j<nmask; j++) { 3653 a->j[jcount++] = masked[j]; 3654 mask[masked[j]] = maskcount++; 3655 } 3656 /* set "a" values into matrix */ 3657 ishift = bs2*a->i[i]; 3658 for (j=0; j<bs; j++) { 3659 kmax = rowlengths[i*bs+j]; 3660 for (k=0; k<kmax; k++) { 3661 tmp = jj[nzcountb]/bs ; 3662 block = mask[tmp] - 1; 3663 point = jj[nzcountb] - bs*tmp; 3664 idx = ishift + bs2*block + j + bs*point; 3665 a->a[idx] = (MatScalar)aa[nzcountb++]; 3666 } 3667 } 3668 /* zero out the mask elements we set */ 3669 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3670 } 3671 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3672 3673 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3674 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3675 ierr = PetscFree(aa);CHKERRQ(ierr); 3676 ierr = PetscFree(jj);CHKERRQ(ierr); 3677 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3678 3679 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3680 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3681 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3682 PetscFunctionReturn(0); 3683 } 3684 3685 #undef __FUNCT__ 3686 #define __FUNCT__ "MatCreateSeqBAIJ" 3687 /*@C 3688 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3689 compressed row) format. For good matrix assembly performance the 3690 user should preallocate the matrix storage by setting the parameter nz 3691 (or the array nnz). By setting these parameters accurately, performance 3692 during matrix assembly can be increased by more than a factor of 50. 3693 3694 Collective on MPI_Comm 3695 3696 Input Parameters: 3697 + comm - MPI communicator, set to PETSC_COMM_SELF 3698 . bs - size of block 3699 . m - number of rows 3700 . n - number of columns 3701 . nz - number of nonzero blocks per block row (same for all rows) 3702 - nnz - array containing the number of nonzero blocks in the various block rows 3703 (possibly different for each block row) or PETSC_NULL 3704 3705 Output Parameter: 3706 . A - the matrix 3707 3708 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3709 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3710 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3711 3712 Options Database Keys: 3713 . -mat_no_unroll - uses code that does not unroll the loops in the 3714 block calculations (much slower) 3715 . -mat_block_size - size of the blocks to use 3716 3717 Level: intermediate 3718 3719 Notes: 3720 The number of rows and columns must be divisible by blocksize. 3721 3722 If the nnz parameter is given then the nz parameter is ignored 3723 3724 A nonzero block is any block that as 1 or more nonzeros in it 3725 3726 The block AIJ format is fully compatible with standard Fortran 77 3727 storage. That is, the stored row and column indices can begin at 3728 either one (as in Fortran) or zero. See the users' manual for details. 3729 3730 Specify the preallocated storage with either nz or nnz (not both). 3731 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3732 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3733 matrices. 3734 3735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3736 @*/ 3737 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3738 { 3739 PetscErrorCode ierr; 3740 3741 PetscFunctionBegin; 3742 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3743 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3744 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3745 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3751 /*@C 3752 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3753 per row in the matrix. For good matrix assembly performance the 3754 user should preallocate the matrix storage by setting the parameter nz 3755 (or the array nnz). By setting these parameters accurately, performance 3756 during matrix assembly can be increased by more than a factor of 50. 3757 3758 Collective on MPI_Comm 3759 3760 Input Parameters: 3761 + A - the matrix 3762 . bs - size of block 3763 . nz - number of block nonzeros per block row (same for all rows) 3764 - nnz - array containing the number of block nonzeros in the various block rows 3765 (possibly different for each block row) or PETSC_NULL 3766 3767 Options Database Keys: 3768 . -mat_no_unroll - uses code that does not unroll the loops in the 3769 block calculations (much slower) 3770 . -mat_block_size - size of the blocks to use 3771 3772 Level: intermediate 3773 3774 Notes: 3775 If the nnz parameter is given then the nz parameter is ignored 3776 3777 You can call MatGetInfo() to get information on how effective the preallocation was; 3778 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3779 You can also run with the option -info and look for messages with the string 3780 malloc in them to see if additional memory allocation was needed. 3781 3782 The block AIJ format is fully compatible with standard Fortran 77 3783 storage. That is, the stored row and column indices can begin at 3784 either one (as in Fortran) or zero. See the users' manual for details. 3785 3786 Specify the preallocated storage with either nz or nnz (not both). 3787 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3788 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3789 3790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3791 @*/ 3792 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3793 { 3794 PetscErrorCode ierr; 3795 3796 PetscFunctionBegin; 3797 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3798 PetscValidType(B,1); 3799 PetscValidLogicalCollectiveInt(B,bs,2); 3800 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3801 PetscFunctionReturn(0); 3802 } 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3806 /*@C 3807 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3808 (the default sequential PETSc format). 3809 3810 Collective on MPI_Comm 3811 3812 Input Parameters: 3813 + A - the matrix 3814 . i - the indices into j for the start of each local row (starts with zero) 3815 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3816 - v - optional values in the matrix 3817 3818 Level: developer 3819 3820 .keywords: matrix, aij, compressed row, sparse 3821 3822 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3823 @*/ 3824 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3825 { 3826 PetscErrorCode ierr; 3827 3828 PetscFunctionBegin; 3829 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3830 PetscValidType(B,1); 3831 PetscValidLogicalCollectiveInt(B,bs,2); 3832 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 3837 #undef __FUNCT__ 3838 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3839 /*@ 3840 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3841 3842 Collective on MPI_Comm 3843 3844 Input Parameters: 3845 + comm - must be an MPI communicator of size 1 3846 . bs - size of block 3847 . m - number of rows 3848 . n - number of columns 3849 . i - row indices 3850 . j - column indices 3851 - a - matrix values 3852 3853 Output Parameter: 3854 . mat - the matrix 3855 3856 Level: advanced 3857 3858 Notes: 3859 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3860 once the matrix is destroyed 3861 3862 You cannot set new nonzero locations into this matrix, that will generate an error. 3863 3864 The i and j indices are 0 based 3865 3866 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3867 3868 3869 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3870 3871 @*/ 3872 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3873 { 3874 PetscErrorCode ierr; 3875 PetscInt ii; 3876 Mat_SeqBAIJ *baij; 3877 3878 PetscFunctionBegin; 3879 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3880 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3881 3882 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3883 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3884 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3885 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3886 baij = (Mat_SeqBAIJ*)(*mat)->data; 3887 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3888 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3889 3890 baij->i = i; 3891 baij->j = j; 3892 baij->a = a; 3893 baij->singlemalloc = PETSC_FALSE; 3894 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3895 baij->free_a = PETSC_FALSE; 3896 baij->free_ij = PETSC_FALSE; 3897 3898 for (ii=0; ii<m; ii++) { 3899 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3900 #if defined(PETSC_USE_DEBUG) 3901 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3902 #endif 3903 } 3904 #if defined(PETSC_USE_DEBUG) 3905 for (ii=0; ii<baij->i[m]; ii++) { 3906 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3907 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3908 } 3909 #endif 3910 3911 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3912 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915