1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 EXTERN_C_BEGIN 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 38 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 39 { 40 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 41 PetscErrorCode ierr; 42 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 43 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 44 PetscReal shift = 0.0; 45 46 PetscFunctionBegin; 47 if (a->idiagvalid) PetscFunctionReturn(0); 48 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 49 diag_offset = a->diag; 50 if (!a->idiag) { 51 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 52 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 53 } 54 diag = a->idiag; 55 mdiag = a->idiag+bs2*mbs; 56 /* factor and invert each block */ 57 switch (bs){ 58 case 1: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 1*diag_offset[i]; 61 diag[0] = odiag[0]; 62 mdiag[0] = odiag[0]; 63 diag[0] = 1.0 / (diag[0] + shift); 64 diag += 1; 65 mdiag += 1; 66 } 67 break; 68 case 2: 69 for (i=0; i<mbs; i++) { 70 odiag = v + 4*diag_offset[i]; 71 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 72 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 73 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 74 diag += 4; 75 mdiag += 4; 76 } 77 break; 78 case 3: 79 for (i=0; i<mbs; i++) { 80 odiag = v + 9*diag_offset[i]; 81 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 82 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 83 diag[8] = odiag[8]; 84 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 85 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 86 mdiag[8] = odiag[8]; 87 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 88 diag += 9; 89 mdiag += 9; 90 } 91 break; 92 case 4: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 16*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 98 diag += 16; 99 mdiag += 16; 100 } 101 break; 102 case 5: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 25*diag_offset[i]; 105 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 108 diag += 25; 109 mdiag += 25; 110 } 111 break; 112 case 6: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 36*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 118 diag += 36; 119 mdiag += 36; 120 } 121 break; 122 case 7: 123 for (i=0; i<mbs; i++) { 124 odiag = v + 49*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 139 diag += bs2; 140 mdiag += bs2; 141 } 142 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 143 } 144 a->idiagvalid = PETSC_TRUE; 145 PetscFunctionReturn(0); 146 } 147 EXTERN_C_END 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 151 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 152 { 153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154 PetscScalar *x,x1,s1; 155 const PetscScalar *b; 156 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 157 PetscErrorCode ierr; 158 PetscInt m = a->mbs,i,i2,nz,j; 159 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 160 161 PetscFunctionBegin; 162 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 163 its = its*lits; 164 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 169 170 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 171 172 diag = a->diag; 173 idiag = a->idiag; 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 179 x[0] = b[0]*idiag[0]; 180 i2 = 1; 181 idiag += 1; 182 for (i=1; i<m; i++) { 183 v = aa + ai[i]; 184 vi = aj + ai[i]; 185 nz = diag[i] - ai[i]; 186 s1 = b[i2]; 187 for (j=0; j<nz; j++) { 188 s1 -= v[j]*x[vi[j]]; 189 } 190 x[i2] = idiag[0]*s1; 191 idiag += 1; 192 i2 += 1; 193 } 194 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 195 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 196 } 197 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 198 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 199 i2 = 0; 200 mdiag = a->idiag+a->mbs; 201 for (i=0; i<m; i++) { 202 x1 = x[i2]; 203 x[i2] = mdiag[0]*x1; 204 mdiag += 1; 205 i2 += 1; 206 } 207 ierr = PetscLogFlops(m);CHKERRQ(ierr); 208 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 209 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 210 } 211 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 212 idiag = a->idiag+a->mbs - 1; 213 i2 = m - 1; 214 x1 = x[i2]; 215 x[i2] = idiag[0]*x1; 216 idiag -= 1; 217 i2 -= 1; 218 for (i=m-2; i>=0; i--) { 219 v = aa + (diag[i]+1); 220 vi = aj + diag[i] + 1; 221 nz = ai[i+1] - diag[i] - 1; 222 s1 = x[i2]; 223 for (j=0; j<nz; j++) { 224 s1 -= v[j]*x[vi[j]]; 225 } 226 x[i2] = idiag[0]*s1; 227 idiag -= 1; 228 i2 -= 1; 229 } 230 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 231 } 232 } else { 233 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 234 } 235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 242 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 243 { 244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 245 PetscScalar *x,x1,x2,s1,s2; 246 const PetscScalar *b; 247 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 248 PetscErrorCode ierr; 249 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 250 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 251 252 PetscFunctionBegin; 253 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 254 its = its*lits; 255 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 256 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 257 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 258 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 259 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 260 261 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 262 263 diag = a->diag; 264 idiag = a->idiag; 265 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 267 268 if (flag & SOR_ZERO_INITIAL_GUESS) { 269 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 270 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 271 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 272 i2 = 2; 273 idiag += 4; 274 for (i=1; i<m; i++) { 275 v = aa + 4*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s1 = b[i2]; s2 = b[i2+1]; 279 for (j=0; j<nz; j++) { 280 idx = 2*vi[j]; 281 it = 4*j; 282 x1 = x[idx]; x2 = x[1+idx]; 283 s1 -= v[it]*x1 + v[it+2]*x2; 284 s2 -= v[it+1]*x1 + v[it+3]*x2; 285 } 286 x[i2] = idiag[0]*s1 + idiag[2]*s2; 287 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 288 idiag += 4; 289 i2 += 2; 290 } 291 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 292 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 293 } 294 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 295 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 296 i2 = 0; 297 mdiag = a->idiag+4*a->mbs; 298 for (i=0; i<m; i++) { 299 x1 = x[i2]; x2 = x[i2+1]; 300 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 301 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 302 mdiag += 4; 303 i2 += 2; 304 } 305 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 306 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 307 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 308 } 309 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 310 idiag = a->idiag+4*a->mbs - 4; 311 i2 = 2*m - 2; 312 x1 = x[i2]; x2 = x[i2+1]; 313 x[i2] = idiag[0]*x1 + idiag[2]*x2; 314 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 315 idiag -= 4; 316 i2 -= 2; 317 for (i=m-2; i>=0; i--) { 318 v = aa + 4*(diag[i]+1); 319 vi = aj + diag[i] + 1; 320 nz = ai[i+1] - diag[i] - 1; 321 s1 = x[i2]; s2 = x[i2+1]; 322 for (j=0; j<nz; j++) { 323 idx = 2*vi[j]; 324 it = 4*j; 325 x1 = x[idx]; x2 = x[1+idx]; 326 s1 -= v[it]*x1 + v[it+2]*x2; 327 s2 -= v[it+1]*x1 + v[it+3]*x2; 328 } 329 x[i2] = idiag[0]*s1 + idiag[2]*s2; 330 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 331 idiag -= 4; 332 i2 -= 2; 333 } 334 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 335 } 336 } else { 337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 338 } 339 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 346 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 347 { 348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 349 PetscScalar *x,x1,x2,x3,s1,s2,s3; 350 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 351 const PetscScalar *b; 352 PetscErrorCode ierr; 353 PetscInt m = a->mbs,i,i2,nz,idx; 354 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 355 356 PetscFunctionBegin; 357 its = its*lits; 358 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 359 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 360 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 361 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 362 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 363 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 364 365 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 366 367 diag = a->diag; 368 idiag = a->idiag; 369 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 370 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 371 372 if (flag & SOR_ZERO_INITIAL_GUESS) { 373 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 374 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 375 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 376 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 377 i2 = 3; 378 idiag += 9; 379 for (i=1; i<m; i++) { 380 v = aa + 9*ai[i]; 381 vi = aj + ai[i]; 382 nz = diag[i] - ai[i]; 383 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 384 while (nz--) { 385 idx = 3*(*vi++); 386 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 387 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 388 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 389 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 390 v += 9; 391 } 392 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 393 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 394 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 395 idiag += 9; 396 i2 += 3; 397 } 398 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 399 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 400 } 401 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 402 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 403 i2 = 0; 404 mdiag = a->idiag+9*a->mbs; 405 for (i=0; i<m; i++) { 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 407 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 408 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 409 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 410 mdiag += 9; 411 i2 += 3; 412 } 413 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 414 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 415 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 416 } 417 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 418 idiag = a->idiag+9*a->mbs - 9; 419 i2 = 3*m - 3; 420 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 421 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 422 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 423 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 424 idiag -= 9; 425 i2 -= 3; 426 for (i=m-2; i>=0; i--) { 427 v = aa + 9*(diag[i]+1); 428 vi = aj + diag[i] + 1; 429 nz = ai[i+1] - diag[i] - 1; 430 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 431 while (nz--) { 432 idx = 3*(*vi++); 433 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 434 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 435 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 436 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 437 v += 9; 438 } 439 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 440 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 441 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 446 } 447 } else { 448 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 449 } 450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 457 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 458 { 459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 460 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 461 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 462 const PetscScalar *b; 463 PetscErrorCode ierr; 464 PetscInt m = a->mbs,i,i2,nz,idx; 465 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 466 467 PetscFunctionBegin; 468 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 469 its = its*lits; 470 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 471 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 472 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 473 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 474 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 475 476 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 477 478 diag = a->diag; 479 idiag = a->idiag; 480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 481 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 482 483 if (flag & SOR_ZERO_INITIAL_GUESS) { 484 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 485 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 486 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 487 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 488 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 489 i2 = 4; 490 idiag += 16; 491 for (i=1; i<m; i++) { 492 v = aa + 16*ai[i]; 493 vi = aj + ai[i]; 494 nz = diag[i] - ai[i]; 495 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 496 while (nz--) { 497 idx = 4*(*vi++); 498 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 499 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 500 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 501 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 502 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 503 v += 16; 504 } 505 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 506 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 507 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 508 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 509 idiag += 16; 510 i2 += 4; 511 } 512 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 513 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 514 } 515 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 516 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 517 i2 = 0; 518 mdiag = a->idiag+16*a->mbs; 519 for (i=0; i<m; i++) { 520 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 521 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 522 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 523 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 524 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 525 mdiag += 16; 526 i2 += 4; 527 } 528 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 529 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 530 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 531 } 532 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 533 idiag = a->idiag+16*a->mbs - 16; 534 i2 = 4*m - 4; 535 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 536 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 537 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 538 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 539 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 540 idiag -= 16; 541 i2 -= 4; 542 for (i=m-2; i>=0; i--) { 543 v = aa + 16*(diag[i]+1); 544 vi = aj + diag[i] + 1; 545 nz = ai[i+1] - diag[i] - 1; 546 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 547 while (nz--) { 548 idx = 4*(*vi++); 549 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 550 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 551 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 552 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 553 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 554 v += 16; 555 } 556 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 557 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 558 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 559 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 560 idiag -= 16; 561 i2 -= 4; 562 } 563 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 564 } 565 } else { 566 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 567 } 568 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 569 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 575 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 576 { 577 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 578 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 579 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 580 const PetscScalar *b; 581 PetscErrorCode ierr; 582 PetscInt m = a->mbs,i,i2,nz,idx; 583 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 584 585 PetscFunctionBegin; 586 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 587 its = its*lits; 588 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 589 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 590 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 591 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 592 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 593 594 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 595 596 diag = a->diag; 597 idiag = a->idiag; 598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 599 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 600 601 if (flag & SOR_ZERO_INITIAL_GUESS) { 602 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 603 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 604 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 605 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 606 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 607 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 608 i2 = 5; 609 idiag += 25; 610 for (i=1; i<m; i++) { 611 v = aa + 25*ai[i]; 612 vi = aj + ai[i]; 613 nz = diag[i] - ai[i]; 614 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 615 while (nz--) { 616 idx = 5*(*vi++); 617 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 618 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 619 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 620 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 621 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 622 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 623 v += 25; 624 } 625 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 626 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 627 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 628 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 629 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 630 idiag += 25; 631 i2 += 5; 632 } 633 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 634 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 635 } 636 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 637 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 638 i2 = 0; 639 mdiag = a->idiag+25*a->mbs; 640 for (i=0; i<m; i++) { 641 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 642 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 643 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 644 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 645 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 646 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 647 mdiag += 25; 648 i2 += 5; 649 } 650 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 651 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 652 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 653 } 654 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 655 idiag = a->idiag+25*a->mbs - 25; 656 i2 = 5*m - 5; 657 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 658 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 659 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 660 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 661 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 662 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 663 idiag -= 25; 664 i2 -= 5; 665 for (i=m-2; i>=0; i--) { 666 v = aa + 25*(diag[i]+1); 667 vi = aj + diag[i] + 1; 668 nz = ai[i+1] - diag[i] - 1; 669 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 670 while (nz--) { 671 idx = 5*(*vi++); 672 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 673 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 674 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 675 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 676 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 677 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 678 v += 25; 679 } 680 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 681 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 682 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 683 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 684 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 685 idiag -= 25; 686 i2 -= 5; 687 } 688 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 689 } 690 } else { 691 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 692 } 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 700 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 701 { 702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 703 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 704 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 705 const PetscScalar *b; 706 PetscErrorCode ierr; 707 PetscInt m = a->mbs,i,i2,nz,idx; 708 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 709 710 PetscFunctionBegin; 711 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 712 its = its*lits; 713 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 714 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 715 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 716 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 717 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 718 719 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 720 721 diag = a->diag; 722 idiag = a->idiag; 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 725 726 if (flag & SOR_ZERO_INITIAL_GUESS) { 727 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 728 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 729 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 730 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 731 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 732 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 733 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 734 i2 = 6; 735 idiag += 36; 736 for (i=1; i<m; i++) { 737 v = aa + 36*ai[i]; 738 vi = aj + ai[i]; 739 nz = diag[i] - ai[i]; 740 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 741 while (nz--) { 742 idx = 6*(*vi++); 743 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 744 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 745 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 746 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 747 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 748 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 749 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 750 v += 36; 751 } 752 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 753 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 754 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 755 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 756 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 757 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 758 idiag += 36; 759 i2 += 6; 760 } 761 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 762 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 763 } 764 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 765 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 766 i2 = 0; 767 mdiag = a->idiag+36*a->mbs; 768 for (i=0; i<m; i++) { 769 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 770 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 771 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 772 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 773 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 774 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 775 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 776 mdiag += 36; 777 i2 += 6; 778 } 779 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 780 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 781 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 782 } 783 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 784 idiag = a->idiag+36*a->mbs - 36; 785 i2 = 6*m - 6; 786 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 787 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 788 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 789 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 790 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 791 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 792 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 793 idiag -= 36; 794 i2 -= 6; 795 for (i=m-2; i>=0; i--) { 796 v = aa + 36*(diag[i]+1); 797 vi = aj + diag[i] + 1; 798 nz = ai[i+1] - diag[i] - 1; 799 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 800 while (nz--) { 801 idx = 6*(*vi++); 802 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 803 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 804 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 805 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 806 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 807 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 808 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 809 v += 36; 810 } 811 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 812 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 813 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 814 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 815 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 816 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 817 idiag -= 36; 818 i2 -= 6; 819 } 820 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 821 } 822 } else { 823 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 824 } 825 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 826 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 832 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 836 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 837 const PetscScalar *b; 838 PetscErrorCode ierr; 839 PetscInt m = a->mbs,i,i2,nz,idx; 840 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 841 842 PetscFunctionBegin; 843 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 844 its = its*lits; 845 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 846 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 847 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 848 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 849 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 850 851 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 852 853 diag = a->diag; 854 idiag = a->idiag; 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 857 858 if (flag & SOR_ZERO_INITIAL_GUESS) { 859 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 860 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 861 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 862 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 863 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 864 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 865 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 866 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 867 i2 = 7; 868 idiag += 49; 869 for (i=1; i<m; i++) { 870 v = aa + 49*ai[i]; 871 vi = aj + ai[i]; 872 nz = diag[i] - ai[i]; 873 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 874 while (nz--) { 875 idx = 7*(*vi++); 876 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 877 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 878 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 879 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 880 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 881 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 882 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 883 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 884 v += 49; 885 } 886 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 887 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 888 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 889 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 890 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 891 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 892 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 893 idiag += 49; 894 i2 += 7; 895 } 896 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 897 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 898 } 899 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 900 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 901 i2 = 0; 902 mdiag = a->idiag+49*a->mbs; 903 for (i=0; i<m; i++) { 904 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 905 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 906 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 907 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 908 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 909 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 910 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 911 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 912 mdiag += 49; 913 i2 += 7; 914 } 915 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 916 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 917 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 918 } 919 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 920 idiag = a->idiag+49*a->mbs - 49; 921 i2 = 7*m - 7; 922 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 923 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 924 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 925 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 926 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 927 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 928 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 929 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 930 idiag -= 49; 931 i2 -= 7; 932 for (i=m-2; i>=0; i--) { 933 v = aa + 49*(diag[i]+1); 934 vi = aj + diag[i] + 1; 935 nz = ai[i+1] - diag[i] - 1; 936 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 937 while (nz--) { 938 idx = 7*(*vi++); 939 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 940 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 941 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 942 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 943 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 944 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 945 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 946 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 947 v += 49; 948 } 949 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 950 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 951 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 952 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 953 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 954 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 955 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 956 idiag -= 49; 957 i2 -= 7; 958 } 959 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 960 } 961 } else { 962 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 963 } 964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 971 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 972 { 973 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 974 PetscScalar *x,*work,*w,*workt; 975 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 976 const PetscScalar *b; 977 PetscErrorCode ierr; 978 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 979 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 980 981 PetscFunctionBegin; 982 its = its*lits; 983 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 984 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 985 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 986 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 987 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 988 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 989 990 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 991 992 diag = a->diag; 993 idiag = a->idiag; 994 if (!a->mult_work) { 995 k = PetscMax(A->rmap->n,A->cmap->n); 996 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 997 } 998 work = a->mult_work; 999 if (!a->sor_work) { 1000 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1001 } 1002 w = a->sor_work; 1003 1004 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1006 1007 if (flag & SOR_ZERO_INITIAL_GUESS) { 1008 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1009 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1010 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1011 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1012 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1013 i2 = bs; 1014 idiag += bs2; 1015 for (i=1; i<m; i++) { 1016 v = aa + bs2*ai[i]; 1017 vi = aj + ai[i]; 1018 nz = diag[i] - ai[i]; 1019 1020 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1021 /* copy all rows of x that are needed into contiguous space */ 1022 workt = work; 1023 for (j=0; j<nz; j++) { 1024 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1025 workt += bs; 1026 } 1027 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1028 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1029 while (nz--) { 1030 idx = N*(*vi++); 1031 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1032 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1033 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1034 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1035 v += N2; 1036 } */ 1037 1038 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1039 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1040 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1041 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1042 1043 idiag += bs2; 1044 i2 += bs; 1045 } 1046 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1047 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1048 } 1049 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1050 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1051 i2 = 0; 1052 mdiag = a->idiag+bs2*a->mbs; 1053 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1054 for (i=0; i<m; i++) { 1055 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1056 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1057 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1058 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1059 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1060 1061 mdiag += bs2; 1062 i2 += bs; 1063 } 1064 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1065 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1066 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1067 } 1068 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1069 idiag = a->idiag+bs2*a->mbs - bs2; 1070 i2 = bs*m - bs; 1071 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1072 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1073 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1074 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1075 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1076 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1077 idiag -= bs2; 1078 i2 -= bs; 1079 for (i=m-2; i>=0; i--) { 1080 v = aa + bs2*(diag[i]+1); 1081 vi = aj + diag[i] + 1; 1082 nz = ai[i+1] - diag[i] - 1; 1083 1084 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1085 /* copy all rows of x that are needed into contiguous space */ 1086 workt = work; 1087 for (j=0; j<nz; j++) { 1088 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1089 workt += bs; 1090 } 1091 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1092 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1093 while (nz--) { 1094 idx = N*(*vi++); 1095 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1096 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1097 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1098 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1099 v += N2; 1100 } */ 1101 1102 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1103 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1104 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1105 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1106 idiag -= bs2; 1107 i2 -= bs; 1108 } 1109 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1110 } 1111 } else { 1112 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1113 } 1114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1115 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 /* 1120 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1121 */ 1122 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1123 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1125 #define matsetvaluesblocked4_ matsetvaluesblocked4 1126 #endif 1127 1128 EXTERN_C_BEGIN 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "matsetvaluesblocked4_" 1131 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1132 { 1133 Mat A = *AA; 1134 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1135 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1136 PetscInt *ai=a->i,*ailen=a->ilen; 1137 PetscInt *aj=a->j,stepval,lastcol = -1; 1138 const PetscScalar *value = v; 1139 MatScalar *ap,*aa = a->a,*bap; 1140 1141 PetscFunctionBegin; 1142 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1143 stepval = (n-1)*4; 1144 for (k=0; k<m; k++) { /* loop over added rows */ 1145 row = im[k]; 1146 rp = aj + ai[row]; 1147 ap = aa + 16*ai[row]; 1148 nrow = ailen[row]; 1149 low = 0; 1150 high = nrow; 1151 for (l=0; l<n; l++) { /* loop over added columns */ 1152 col = in[l]; 1153 if (col <= lastcol) low = 0; else high = nrow; 1154 lastcol = col; 1155 value = v + k*(stepval+4 + l)*4; 1156 while (high-low > 7) { 1157 t = (low+high)/2; 1158 if (rp[t] > col) high = t; 1159 else low = t; 1160 } 1161 for (i=low; i<high; i++) { 1162 if (rp[i] > col) break; 1163 if (rp[i] == col) { 1164 bap = ap + 16*i; 1165 for (ii=0; ii<4; ii++,value+=stepval) { 1166 for (jj=ii; jj<16; jj+=4) { 1167 bap[jj] += *value++; 1168 } 1169 } 1170 goto noinsert2; 1171 } 1172 } 1173 N = nrow++ - 1; 1174 high++; /* added new column index thus must search to one higher than before */ 1175 /* shift up all the later entries in this row */ 1176 for (ii=N; ii>=i; ii--) { 1177 rp[ii+1] = rp[ii]; 1178 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1179 } 1180 if (N >= i) { 1181 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1182 } 1183 rp[i] = col; 1184 bap = ap + 16*i; 1185 for (ii=0; ii<4; ii++,value+=stepval) { 1186 for (jj=ii; jj<16; jj+=4) { 1187 bap[jj] = *value++; 1188 } 1189 } 1190 noinsert2:; 1191 low = i; 1192 } 1193 ailen[row] = nrow; 1194 } 1195 PetscFunctionReturnVoid(); 1196 } 1197 EXTERN_C_END 1198 1199 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1200 #define matsetvalues4_ MATSETVALUES4 1201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1202 #define matsetvalues4_ matsetvalues4 1203 #endif 1204 1205 EXTERN_C_BEGIN 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatSetValues4_" 1208 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1209 { 1210 Mat A = *AA; 1211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1212 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1213 PetscInt *ai=a->i,*ailen=a->ilen; 1214 PetscInt *aj=a->j,brow,bcol; 1215 PetscInt ridx,cidx,lastcol = -1; 1216 MatScalar *ap,value,*aa=a->a,*bap; 1217 1218 PetscFunctionBegin; 1219 for (k=0; k<m; k++) { /* loop over added rows */ 1220 row = im[k]; brow = row/4; 1221 rp = aj + ai[brow]; 1222 ap = aa + 16*ai[brow]; 1223 nrow = ailen[brow]; 1224 low = 0; 1225 high = nrow; 1226 for (l=0; l<n; l++) { /* loop over added columns */ 1227 col = in[l]; bcol = col/4; 1228 ridx = row % 4; cidx = col % 4; 1229 value = v[l + k*n]; 1230 if (col <= lastcol) low = 0; else high = nrow; 1231 lastcol = col; 1232 while (high-low > 7) { 1233 t = (low+high)/2; 1234 if (rp[t] > bcol) high = t; 1235 else low = t; 1236 } 1237 for (i=low; i<high; i++) { 1238 if (rp[i] > bcol) break; 1239 if (rp[i] == bcol) { 1240 bap = ap + 16*i + 4*cidx + ridx; 1241 *bap += value; 1242 goto noinsert1; 1243 } 1244 } 1245 N = nrow++ - 1; 1246 high++; /* added new column thus must search to one higher than before */ 1247 /* shift up all the later entries in this row */ 1248 for (ii=N; ii>=i; ii--) { 1249 rp[ii+1] = rp[ii]; 1250 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1251 } 1252 if (N>=i) { 1253 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1254 } 1255 rp[i] = bcol; 1256 ap[16*i + 4*cidx + ridx] = value; 1257 noinsert1:; 1258 low = i; 1259 } 1260 ailen[brow] = nrow; 1261 } 1262 PetscFunctionReturnVoid(); 1263 } 1264 EXTERN_C_END 1265 1266 /* 1267 Checks for missing diagonals 1268 */ 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1271 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1272 { 1273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1274 PetscErrorCode ierr; 1275 PetscInt *diag,*jj = a->j,i; 1276 1277 PetscFunctionBegin; 1278 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1279 *missing = PETSC_FALSE; 1280 if (A->rmap->n > 0 && !jj) { 1281 *missing = PETSC_TRUE; 1282 if (d) *d = 0; 1283 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1284 } else { 1285 diag = a->diag; 1286 for (i=0; i<a->mbs; i++) { 1287 if (jj[diag[i]] != i) { 1288 *missing = PETSC_TRUE; 1289 if (d) *d = i; 1290 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1291 } 1292 } 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1299 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1300 { 1301 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 PetscInt i,j,m = a->mbs; 1304 1305 PetscFunctionBegin; 1306 if (!a->diag) { 1307 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1308 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1309 a->free_diag = PETSC_TRUE; 1310 } 1311 for (i=0; i<m; i++) { 1312 a->diag[i] = a->i[i+1]; 1313 for (j=a->i[i]; j<a->i[i+1]; j++) { 1314 if (a->j[j] == i) { 1315 a->diag[i] = j; 1316 break; 1317 } 1318 } 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 1324 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1328 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1329 { 1330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1333 PetscInt *tia, *tja; 1334 1335 PetscFunctionBegin; 1336 *nn = n; 1337 if (!ia) PetscFunctionReturn(0); 1338 if (symmetric) { 1339 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1340 nz = tia[n]; 1341 } else { 1342 tia = a->i; tja = a->j; 1343 } 1344 1345 if (!blockcompressed && bs > 1) { 1346 (*nn) *= bs; 1347 /* malloc & create the natural set of indices */ 1348 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1349 if (n) { 1350 (*ia)[0] = 0; 1351 for (j=1; j<bs; j++) { 1352 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1353 } 1354 } 1355 1356 for (i=1; i<n; i++) { 1357 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1358 for (j=1; j<bs; j++) { 1359 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1360 } 1361 } 1362 if (n) { 1363 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1364 } 1365 1366 if (ja) { 1367 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1368 cnt = 0; 1369 for (i=0; i<n; i++) { 1370 for (j=0; j<bs; j++) { 1371 for (k=tia[i]; k<tia[i+1]; k++) { 1372 for (l=0; l<bs; l++) { 1373 (*ja)[cnt++] = bs*tja[k] + l; 1374 } 1375 } 1376 } 1377 } 1378 } 1379 1380 n *= bs; 1381 nz *= bs*bs; 1382 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1383 ierr = PetscFree(tia);CHKERRQ(ierr); 1384 ierr = PetscFree(tja);CHKERRQ(ierr); 1385 } 1386 } else if (oshift == 1) { 1387 if (symmetric) { 1388 PetscInt nz = tia[A->rmap->n/bs]; 1389 /* add 1 to i and j indices */ 1390 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1391 *ia = tia; 1392 if (ja) { 1393 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1394 *ja = tja; 1395 } 1396 } else { 1397 PetscInt nz = a->i[A->rmap->n/bs]; 1398 /* malloc space and add 1 to i and j indices */ 1399 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1400 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1401 if (ja) { 1402 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1403 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1404 } 1405 } 1406 } else { 1407 *ia = tia; 1408 if (ja) *ja = tja; 1409 } 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1416 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1417 { 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 if (!ia) PetscFunctionReturn(0); 1422 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1423 ierr = PetscFree(*ia);CHKERRQ(ierr); 1424 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1431 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1434 PetscErrorCode ierr; 1435 1436 PetscFunctionBegin; 1437 #if defined(PETSC_USE_LOG) 1438 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1439 #endif 1440 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1441 if (a->row) { 1442 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1443 } 1444 if (a->col) { 1445 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1446 } 1447 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1448 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1449 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1450 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1451 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1452 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1453 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1454 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1455 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1456 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1457 1458 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1459 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1460 ierr = PetscFree(a);CHKERRQ(ierr); 1461 1462 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1468 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1469 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1476 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1477 { 1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 switch (op) { 1483 case MAT_ROW_ORIENTED: 1484 a->roworiented = flg; 1485 break; 1486 case MAT_KEEP_NONZERO_PATTERN: 1487 a->keepnonzeropattern = flg; 1488 break; 1489 case MAT_NEW_NONZERO_LOCATIONS: 1490 a->nonew = (flg ? 0 : 1); 1491 break; 1492 case MAT_NEW_NONZERO_LOCATION_ERR: 1493 a->nonew = (flg ? -1 : 0); 1494 break; 1495 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1496 a->nonew = (flg ? -2 : 0); 1497 break; 1498 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1499 a->nounused = (flg ? -1 : 0); 1500 break; 1501 case MAT_NEW_DIAGONALS: 1502 case MAT_IGNORE_OFF_PROC_ENTRIES: 1503 case MAT_USE_HASH_TABLE: 1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1505 break; 1506 case MAT_SYMMETRIC: 1507 case MAT_STRUCTURALLY_SYMMETRIC: 1508 case MAT_HERMITIAN: 1509 case MAT_SYMMETRY_ETERNAL: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 default: 1513 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1525 MatScalar *aa,*aa_i; 1526 PetscScalar *v_i; 1527 1528 PetscFunctionBegin; 1529 bs = A->rmap->bs; 1530 ai = a->i; 1531 aj = a->j; 1532 aa = a->a; 1533 bs2 = a->bs2; 1534 1535 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1536 1537 bn = row/bs; /* Block number */ 1538 bp = row % bs; /* Block Position */ 1539 M = ai[bn+1] - ai[bn]; 1540 *nz = bs*M; 1541 1542 if (v) { 1543 *v = 0; 1544 if (*nz) { 1545 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1546 for (i=0; i<M; i++) { /* for each block in the block row */ 1547 v_i = *v + i*bs; 1548 aa_i = aa + bs2*(ai[bn] + i); 1549 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1550 } 1551 } 1552 } 1553 1554 if (idx) { 1555 *idx = 0; 1556 if (*nz) { 1557 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1558 for (i=0; i<M; i++) { /* for each block in the block row */ 1559 idx_i = *idx + i*bs; 1560 itmp = bs*aj[ai[bn] + i]; 1561 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1562 } 1563 } 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1571 { 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1576 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1577 PetscFunctionReturn(0); 1578 } 1579 1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1585 { 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1587 Mat C; 1588 PetscErrorCode ierr; 1589 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1590 PetscInt *rows,*cols,bs2=a->bs2; 1591 MatScalar *array; 1592 1593 PetscFunctionBegin; 1594 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1595 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1596 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1597 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1598 1599 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1600 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1601 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1602 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1603 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1604 ierr = PetscFree(col);CHKERRQ(ierr); 1605 } else { 1606 C = *B; 1607 } 1608 1609 array = a->a; 1610 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1611 for (i=0; i<mbs; i++) { 1612 cols[0] = i*bs; 1613 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1614 len = ai[i+1] - ai[i]; 1615 for (j=0; j<len; j++) { 1616 rows[0] = (*aj++)*bs; 1617 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1618 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1619 array += bs2; 1620 } 1621 } 1622 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1623 1624 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 1627 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1628 *B = C; 1629 } else { 1630 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1637 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1638 { 1639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1640 PetscErrorCode ierr; 1641 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1642 int fd; 1643 PetscScalar *aa; 1644 FILE *file; 1645 1646 PetscFunctionBegin; 1647 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1648 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1649 col_lens[0] = MAT_FILE_CLASSID; 1650 1651 col_lens[1] = A->rmap->N; 1652 col_lens[2] = A->cmap->n; 1653 col_lens[3] = a->nz*bs2; 1654 1655 /* store lengths of each row and write (including header) to file */ 1656 count = 0; 1657 for (i=0; i<a->mbs; i++) { 1658 for (j=0; j<bs; j++) { 1659 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1660 } 1661 } 1662 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1663 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1664 1665 /* store column indices (zero start index) */ 1666 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1667 count = 0; 1668 for (i=0; i<a->mbs; i++) { 1669 for (j=0; j<bs; j++) { 1670 for (k=a->i[i]; k<a->i[i+1]; k++) { 1671 for (l=0; l<bs; l++) { 1672 jj[count++] = bs*a->j[k] + l; 1673 } 1674 } 1675 } 1676 } 1677 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1678 ierr = PetscFree(jj);CHKERRQ(ierr); 1679 1680 /* store nonzero values */ 1681 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1682 count = 0; 1683 for (i=0; i<a->mbs; i++) { 1684 for (j=0; j<bs; j++) { 1685 for (k=a->i[i]; k<a->i[i+1]; k++) { 1686 for (l=0; l<bs; l++) { 1687 aa[count++] = a->a[bs2*k + l*bs + j]; 1688 } 1689 } 1690 } 1691 } 1692 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1693 ierr = PetscFree(aa);CHKERRQ(ierr); 1694 1695 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1696 if (file) { 1697 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1698 } 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1704 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1705 { 1706 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1707 PetscErrorCode ierr; 1708 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1709 PetscViewerFormat format; 1710 1711 PetscFunctionBegin; 1712 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1713 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1714 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1715 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1716 Mat aij; 1717 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1718 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1719 ierr = MatDestroy(aij);CHKERRQ(ierr); 1720 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1721 PetscFunctionReturn(0); 1722 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1723 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1724 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1725 for (i=0; i<a->mbs; i++) { 1726 for (j=0; j<bs; j++) { 1727 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1728 for (k=a->i[i]; k<a->i[i+1]; k++) { 1729 for (l=0; l<bs; l++) { 1730 #if defined(PETSC_USE_COMPLEX) 1731 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1732 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1733 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1734 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1735 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1736 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1737 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1738 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1739 } 1740 #else 1741 if (a->a[bs2*k + l*bs + j] != 0.0) { 1742 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1743 } 1744 #endif 1745 } 1746 } 1747 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1748 } 1749 } 1750 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1751 } else { 1752 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1753 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1754 for (i=0; i<a->mbs; i++) { 1755 for (j=0; j<bs; j++) { 1756 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1757 for (k=a->i[i]; k<a->i[i+1]; k++) { 1758 for (l=0; l<bs; l++) { 1759 #if defined(PETSC_USE_COMPLEX) 1760 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1761 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1762 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1763 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1764 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1765 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1766 } else { 1767 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1768 } 1769 #else 1770 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1771 #endif 1772 } 1773 } 1774 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1775 } 1776 } 1777 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1778 } 1779 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1785 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1786 { 1787 Mat A = (Mat) Aa; 1788 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1791 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1792 MatScalar *aa; 1793 PetscViewer viewer; 1794 1795 PetscFunctionBegin; 1796 1797 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1798 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1799 1800 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1801 1802 /* loop over matrix elements drawing boxes */ 1803 color = PETSC_DRAW_BLUE; 1804 for (i=0,row=0; i<mbs; i++,row+=bs) { 1805 for (j=a->i[i]; j<a->i[i+1]; j++) { 1806 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1807 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1808 aa = a->a + j*bs2; 1809 for (k=0; k<bs; k++) { 1810 for (l=0; l<bs; l++) { 1811 if (PetscRealPart(*aa++) >= 0.) continue; 1812 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1813 } 1814 } 1815 } 1816 } 1817 color = PETSC_DRAW_CYAN; 1818 for (i=0,row=0; i<mbs; i++,row+=bs) { 1819 for (j=a->i[i]; j<a->i[i+1]; j++) { 1820 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1821 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1822 aa = a->a + j*bs2; 1823 for (k=0; k<bs; k++) { 1824 for (l=0; l<bs; l++) { 1825 if (PetscRealPart(*aa++) != 0.) continue; 1826 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1827 } 1828 } 1829 } 1830 } 1831 1832 color = PETSC_DRAW_RED; 1833 for (i=0,row=0; i<mbs; i++,row+=bs) { 1834 for (j=a->i[i]; j<a->i[i+1]; j++) { 1835 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1836 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1837 aa = a->a + j*bs2; 1838 for (k=0; k<bs; k++) { 1839 for (l=0; l<bs; l++) { 1840 if (PetscRealPart(*aa++) <= 0.) continue; 1841 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1842 } 1843 } 1844 } 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1851 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1852 { 1853 PetscErrorCode ierr; 1854 PetscReal xl,yl,xr,yr,w,h; 1855 PetscDraw draw; 1856 PetscBool isnull; 1857 1858 PetscFunctionBegin; 1859 1860 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1861 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1862 1863 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1864 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1865 xr += w; yr += h; xl = -w; yl = -h; 1866 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1867 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1868 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatView_SeqBAIJ" 1874 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1875 { 1876 PetscErrorCode ierr; 1877 PetscBool iascii,isbinary,isdraw; 1878 1879 PetscFunctionBegin; 1880 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1881 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1882 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1883 if (iascii){ 1884 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1885 } else if (isbinary) { 1886 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1887 } else if (isdraw) { 1888 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1889 } else { 1890 Mat B; 1891 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1892 ierr = MatView(B,viewer);CHKERRQ(ierr); 1893 ierr = MatDestroy(B);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1901 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1905 PetscInt *ai = a->i,*ailen = a->ilen; 1906 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1907 MatScalar *ap,*aa = a->a; 1908 1909 PetscFunctionBegin; 1910 for (k=0; k<m; k++) { /* loop over rows */ 1911 row = im[k]; brow = row/bs; 1912 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1913 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1914 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1915 nrow = ailen[brow]; 1916 for (l=0; l<n; l++) { /* loop over columns */ 1917 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1918 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1919 col = in[l] ; 1920 bcol = col/bs; 1921 cidx = col%bs; 1922 ridx = row%bs; 1923 high = nrow; 1924 low = 0; /* assume unsorted */ 1925 while (high-low > 5) { 1926 t = (low+high)/2; 1927 if (rp[t] > bcol) high = t; 1928 else low = t; 1929 } 1930 for (i=low; i<high; i++) { 1931 if (rp[i] > bcol) break; 1932 if (rp[i] == bcol) { 1933 *v++ = ap[bs2*i+bs*cidx+ridx]; 1934 goto finished; 1935 } 1936 } 1937 *v++ = 0.0; 1938 finished:; 1939 } 1940 } 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1946 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1947 { 1948 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1949 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1950 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1951 PetscErrorCode ierr; 1952 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1953 PetscBool roworiented=a->roworiented; 1954 const PetscScalar *value = v; 1955 MatScalar *ap,*aa = a->a,*bap; 1956 1957 PetscFunctionBegin; 1958 if (roworiented) { 1959 stepval = (n-1)*bs; 1960 } else { 1961 stepval = (m-1)*bs; 1962 } 1963 for (k=0; k<m; k++) { /* loop over added rows */ 1964 row = im[k]; 1965 if (row < 0) continue; 1966 #if defined(PETSC_USE_DEBUG) 1967 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1968 #endif 1969 rp = aj + ai[row]; 1970 ap = aa + bs2*ai[row]; 1971 rmax = imax[row]; 1972 nrow = ailen[row]; 1973 low = 0; 1974 high = nrow; 1975 for (l=0; l<n; l++) { /* loop over added columns */ 1976 if (in[l] < 0) continue; 1977 #if defined(PETSC_USE_DEBUG) 1978 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1979 #endif 1980 col = in[l]; 1981 if (roworiented) { 1982 value = v + (k*(stepval+bs) + l)*bs; 1983 } else { 1984 value = v + (l*(stepval+bs) + k)*bs; 1985 } 1986 if (col <= lastcol) low = 0; else high = nrow; 1987 lastcol = col; 1988 while (high-low > 7) { 1989 t = (low+high)/2; 1990 if (rp[t] > col) high = t; 1991 else low = t; 1992 } 1993 for (i=low; i<high; i++) { 1994 if (rp[i] > col) break; 1995 if (rp[i] == col) { 1996 bap = ap + bs2*i; 1997 if (roworiented) { 1998 if (is == ADD_VALUES) { 1999 for (ii=0; ii<bs; ii++,value+=stepval) { 2000 for (jj=ii; jj<bs2; jj+=bs) { 2001 bap[jj] += *value++; 2002 } 2003 } 2004 } else { 2005 for (ii=0; ii<bs; ii++,value+=stepval) { 2006 for (jj=ii; jj<bs2; jj+=bs) { 2007 bap[jj] = *value++; 2008 } 2009 } 2010 } 2011 } else { 2012 if (is == ADD_VALUES) { 2013 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2014 for (jj=0; jj<bs; jj++) { 2015 bap[jj] += value[jj]; 2016 } 2017 bap += bs; 2018 } 2019 } else { 2020 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2021 for (jj=0; jj<bs; jj++) { 2022 bap[jj] = value[jj]; 2023 } 2024 bap += bs; 2025 } 2026 } 2027 } 2028 goto noinsert2; 2029 } 2030 } 2031 if (nonew == 1) goto noinsert2; 2032 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2033 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2034 N = nrow++ - 1; high++; 2035 /* shift up all the later entries in this row */ 2036 for (ii=N; ii>=i; ii--) { 2037 rp[ii+1] = rp[ii]; 2038 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2039 } 2040 if (N >= i) { 2041 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2042 } 2043 rp[i] = col; 2044 bap = ap + bs2*i; 2045 if (roworiented) { 2046 for (ii=0; ii<bs; ii++,value+=stepval) { 2047 for (jj=ii; jj<bs2; jj+=bs) { 2048 bap[jj] = *value++; 2049 } 2050 } 2051 } else { 2052 for (ii=0; ii<bs; ii++,value+=stepval) { 2053 for (jj=0; jj<bs; jj++) { 2054 *bap++ = *value++; 2055 } 2056 } 2057 } 2058 noinsert2:; 2059 low = i; 2060 } 2061 ailen[row] = nrow; 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2068 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2069 { 2070 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2071 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2072 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2073 PetscErrorCode ierr; 2074 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2075 MatScalar *aa = a->a,*ap; 2076 PetscReal ratio=0.6; 2077 2078 PetscFunctionBegin; 2079 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2080 2081 if (m) rmax = ailen[0]; 2082 for (i=1; i<mbs; i++) { 2083 /* move each row back by the amount of empty slots (fshift) before it*/ 2084 fshift += imax[i-1] - ailen[i-1]; 2085 rmax = PetscMax(rmax,ailen[i]); 2086 if (fshift) { 2087 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2088 N = ailen[i]; 2089 for (j=0; j<N; j++) { 2090 ip[j-fshift] = ip[j]; 2091 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2092 } 2093 } 2094 ai[i] = ai[i-1] + ailen[i-1]; 2095 } 2096 if (mbs) { 2097 fshift += imax[mbs-1] - ailen[mbs-1]; 2098 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2099 } 2100 /* reset ilen and imax for each row */ 2101 for (i=0; i<mbs; i++) { 2102 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2103 } 2104 a->nz = ai[mbs]; 2105 2106 /* diagonals may have moved, so kill the diagonal pointers */ 2107 a->idiagvalid = PETSC_FALSE; 2108 if (fshift && a->diag) { 2109 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2110 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2111 a->diag = 0; 2112 } 2113 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2114 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2115 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2116 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2117 A->info.mallocs += a->reallocs; 2118 a->reallocs = 0; 2119 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2120 2121 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 2122 if (a->compressedrow.use){ 2123 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2124 } 2125 2126 A->same_nonzero = PETSC_TRUE; 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* 2131 This function returns an array of flags which indicate the locations of contiguous 2132 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2133 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2134 Assume: sizes should be long enough to hold all the values. 2135 */ 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2138 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2139 { 2140 PetscInt i,j,k,row; 2141 PetscBool flg; 2142 2143 PetscFunctionBegin; 2144 for (i=0,j=0; i<n; j++) { 2145 row = idx[i]; 2146 if (row%bs!=0) { /* Not the begining of a block */ 2147 sizes[j] = 1; 2148 i++; 2149 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2150 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2151 i++; 2152 } else { /* Begining of the block, so check if the complete block exists */ 2153 flg = PETSC_TRUE; 2154 for (k=1; k<bs; k++) { 2155 if (row+k != idx[i+k]) { /* break in the block */ 2156 flg = PETSC_FALSE; 2157 break; 2158 } 2159 } 2160 if (flg) { /* No break in the bs */ 2161 sizes[j] = bs; 2162 i+= bs; 2163 } else { 2164 sizes[j] = 1; 2165 i++; 2166 } 2167 } 2168 } 2169 *bs_max = j; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2175 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2176 { 2177 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2178 PetscErrorCode ierr; 2179 PetscInt i,j,k,count,*rows; 2180 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2181 PetscScalar zero = 0.0; 2182 MatScalar *aa; 2183 const PetscScalar *xx; 2184 PetscScalar *bb; 2185 2186 PetscFunctionBegin; 2187 /* fix right hand side if needed */ 2188 if (x && b) { 2189 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2190 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2191 for (i=0; i<is_n; i++) { 2192 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2193 } 2194 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2195 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2196 } 2197 2198 /* Make a copy of the IS and sort it */ 2199 /* allocate memory for rows,sizes */ 2200 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2201 2202 /* copy IS values to rows, and sort them */ 2203 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2204 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2205 2206 if (baij->keepnonzeropattern) { 2207 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2208 bs_max = is_n; 2209 A->same_nonzero = PETSC_TRUE; 2210 } else { 2211 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2212 A->same_nonzero = PETSC_FALSE; 2213 } 2214 2215 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2216 row = rows[j]; 2217 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2218 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2219 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2220 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2221 if (diag != 0.0) { 2222 if (baij->ilen[row/bs] > 0) { 2223 baij->ilen[row/bs] = 1; 2224 baij->j[baij->i[row/bs]] = row/bs; 2225 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2226 } 2227 /* Now insert all the diagonal values for this bs */ 2228 for (k=0; k<bs; k++) { 2229 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2230 } 2231 } else { /* (diag == 0.0) */ 2232 baij->ilen[row/bs] = 0; 2233 } /* end (diag == 0.0) */ 2234 } else { /* (sizes[i] != bs) */ 2235 #if defined (PETSC_USE_DEBUG) 2236 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2237 #endif 2238 for (k=0; k<count; k++) { 2239 aa[0] = zero; 2240 aa += bs; 2241 } 2242 if (diag != 0.0) { 2243 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2244 } 2245 } 2246 } 2247 2248 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2249 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2255 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2256 { 2257 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2258 PetscErrorCode ierr; 2259 PetscInt i,j,k,count; 2260 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2261 PetscScalar zero = 0.0; 2262 MatScalar *aa; 2263 const PetscScalar *xx; 2264 PetscScalar *bb; 2265 PetscBool *zeroed,vecs = PETSC_FALSE; 2266 2267 PetscFunctionBegin; 2268 /* fix right hand side if needed */ 2269 if (x && b) { 2270 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2271 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2272 vecs = PETSC_TRUE; 2273 } 2274 A->same_nonzero = PETSC_TRUE; 2275 2276 /* zero the columns */ 2277 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2278 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2279 for (i=0; i<is_n; i++) { 2280 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2281 zeroed[is_idx[i]] = PETSC_TRUE; 2282 } 2283 for (i=0; i<A->rmap->N; i++) { 2284 if (!zeroed[i]) { 2285 row = i/bs; 2286 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2287 for (k=0; k<bs; k++) { 2288 col = bs*baij->j[j] + k; 2289 if (zeroed[col]) { 2290 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2291 if (vecs) bb[i] -= aa[0]*xx[col]; 2292 aa[0] = 0.0; 2293 } 2294 } 2295 } 2296 } else if (vecs) bb[i] = diag*xx[i]; 2297 } 2298 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2299 if (vecs) { 2300 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2301 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2302 } 2303 2304 /* zero the rows */ 2305 for (i=0; i<is_n; i++) { 2306 row = is_idx[i]; 2307 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2308 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2309 for (k=0; k<count; k++) { 2310 aa[0] = zero; 2311 aa += bs; 2312 } 2313 if (diag != 0.0) { 2314 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2315 } 2316 } 2317 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2323 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2324 { 2325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2326 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2327 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2328 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2329 PetscErrorCode ierr; 2330 PetscInt ridx,cidx,bs2=a->bs2; 2331 PetscBool roworiented=a->roworiented; 2332 MatScalar *ap,value,*aa=a->a,*bap; 2333 2334 PetscFunctionBegin; 2335 if (v) PetscValidScalarPointer(v,6); 2336 for (k=0; k<m; k++) { /* loop over added rows */ 2337 row = im[k]; 2338 brow = row/bs; 2339 if (row < 0) continue; 2340 #if defined(PETSC_USE_DEBUG) 2341 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2342 #endif 2343 rp = aj + ai[brow]; 2344 ap = aa + bs2*ai[brow]; 2345 rmax = imax[brow]; 2346 nrow = ailen[brow]; 2347 low = 0; 2348 high = nrow; 2349 for (l=0; l<n; l++) { /* loop over added columns */ 2350 if (in[l] < 0) continue; 2351 #if defined(PETSC_USE_DEBUG) 2352 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2353 #endif 2354 col = in[l]; bcol = col/bs; 2355 ridx = row % bs; cidx = col % bs; 2356 if (roworiented) { 2357 value = v[l + k*n]; 2358 } else { 2359 value = v[k + l*m]; 2360 } 2361 if (col <= lastcol) low = 0; else high = nrow; 2362 lastcol = col; 2363 while (high-low > 7) { 2364 t = (low+high)/2; 2365 if (rp[t] > bcol) high = t; 2366 else low = t; 2367 } 2368 for (i=low; i<high; i++) { 2369 if (rp[i] > bcol) break; 2370 if (rp[i] == bcol) { 2371 bap = ap + bs2*i + bs*cidx + ridx; 2372 if (is == ADD_VALUES) *bap += value; 2373 else *bap = value; 2374 goto noinsert1; 2375 } 2376 } 2377 if (nonew == 1) goto noinsert1; 2378 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2379 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2380 N = nrow++ - 1; high++; 2381 /* shift up all the later entries in this row */ 2382 for (ii=N; ii>=i; ii--) { 2383 rp[ii+1] = rp[ii]; 2384 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2385 } 2386 if (N>=i) { 2387 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2388 } 2389 rp[i] = bcol; 2390 ap[bs2*i + bs*cidx + ridx] = value; 2391 a->nz++; 2392 noinsert1:; 2393 low = i; 2394 } 2395 ailen[brow] = nrow; 2396 } 2397 A->same_nonzero = PETSC_FALSE; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2403 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2404 { 2405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2406 Mat outA; 2407 PetscErrorCode ierr; 2408 PetscBool row_identity,col_identity; 2409 2410 PetscFunctionBegin; 2411 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2412 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2413 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2414 if (!row_identity || !col_identity) { 2415 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2416 } 2417 2418 outA = inA; 2419 inA->factortype = MAT_FACTOR_LU; 2420 2421 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2422 2423 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2424 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2425 a->row = row; 2426 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2427 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2428 a->col = col; 2429 2430 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2431 if (a->icol) { 2432 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2433 } 2434 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2435 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2436 2437 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2438 if (!a->solve_work) { 2439 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2440 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2441 } 2442 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2443 2444 PetscFunctionReturn(0); 2445 } 2446 2447 EXTERN_C_BEGIN 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2450 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2451 { 2452 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2453 PetscInt i,nz,mbs; 2454 2455 PetscFunctionBegin; 2456 nz = baij->maxnz; 2457 mbs = baij->mbs; 2458 for (i=0; i<nz; i++) { 2459 baij->j[i] = indices[i]; 2460 } 2461 baij->nz = nz; 2462 for (i=0; i<mbs; i++) { 2463 baij->ilen[i] = baij->imax[i]; 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 EXTERN_C_END 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2471 /*@ 2472 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2473 in the matrix. 2474 2475 Input Parameters: 2476 + mat - the SeqBAIJ matrix 2477 - indices - the column indices 2478 2479 Level: advanced 2480 2481 Notes: 2482 This can be called if you have precomputed the nonzero structure of the 2483 matrix and want to provide it to the matrix object to improve the performance 2484 of the MatSetValues() operation. 2485 2486 You MUST have set the correct numbers of nonzeros per row in the call to 2487 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2488 2489 MUST be called before any calls to MatSetValues(); 2490 2491 @*/ 2492 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2493 { 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2498 PetscValidPointer(indices,2); 2499 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2505 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2506 { 2507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2508 PetscErrorCode ierr; 2509 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2510 PetscReal atmp; 2511 PetscScalar *x,zero = 0.0; 2512 MatScalar *aa; 2513 PetscInt ncols,brow,krow,kcol; 2514 2515 PetscFunctionBegin; 2516 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2517 bs = A->rmap->bs; 2518 aa = a->a; 2519 ai = a->i; 2520 aj = a->j; 2521 mbs = a->mbs; 2522 2523 ierr = VecSet(v,zero);CHKERRQ(ierr); 2524 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2525 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2526 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2527 for (i=0; i<mbs; i++) { 2528 ncols = ai[1] - ai[0]; ai++; 2529 brow = bs*i; 2530 for (j=0; j<ncols; j++){ 2531 for (kcol=0; kcol<bs; kcol++){ 2532 for (krow=0; krow<bs; krow++){ 2533 atmp = PetscAbsScalar(*aa);aa++; 2534 row = brow + krow; /* row index */ 2535 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2536 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2537 } 2538 } 2539 aj++; 2540 } 2541 } 2542 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatCopy_SeqBAIJ" 2548 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2549 { 2550 PetscErrorCode ierr; 2551 2552 PetscFunctionBegin; 2553 /* If the two matrices have the same copy implementation, use fast copy. */ 2554 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2556 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2557 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2558 2559 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2560 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2561 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2562 } else { 2563 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2564 } 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2570 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2571 { 2572 PetscErrorCode ierr; 2573 2574 PetscFunctionBegin; 2575 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2581 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2582 { 2583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2584 PetscFunctionBegin; 2585 *array = a->a; 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNCT__ 2590 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2591 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2592 { 2593 PetscFunctionBegin; 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2599 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2600 { 2601 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2602 PetscErrorCode ierr; 2603 PetscInt i,bs=Y->rmap->bs,j,bs2; 2604 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2605 2606 PetscFunctionBegin; 2607 if (str == SAME_NONZERO_PATTERN) { 2608 PetscScalar alpha = a; 2609 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2610 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2611 if (y->xtoy && y->XtoY != X) { 2612 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2613 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2614 } 2615 if (!y->xtoy) { /* get xtoy */ 2616 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2617 y->XtoY = X; 2618 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2619 } 2620 bs2 = bs*bs; 2621 for (i=0; i<x->nz; i++) { 2622 j = 0; 2623 while (j < bs2){ 2624 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2625 j++; 2626 } 2627 } 2628 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2629 } else { 2630 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2631 } 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2637 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2638 { 2639 PetscInt rbs,cbs; 2640 PetscErrorCode ierr; 2641 2642 PetscFunctionBegin; 2643 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2644 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2645 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2646 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2647 PetscFunctionReturn(0); 2648 } 2649 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2652 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2653 { 2654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2655 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2656 MatScalar *aa = a->a; 2657 2658 PetscFunctionBegin; 2659 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 #undef __FUNCT__ 2664 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2665 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2666 { 2667 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2668 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2669 MatScalar *aa = a->a; 2670 2671 PetscFunctionBegin; 2672 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2673 PetscFunctionReturn(0); 2674 } 2675 2676 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2677 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2680 /* 2681 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2682 */ 2683 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2684 { 2685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2686 PetscErrorCode ierr; 2687 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2688 PetscInt nz = a->i[m],row,*jj,mr,col; 2689 2690 PetscFunctionBegin; 2691 *nn = n; 2692 if (!ia) PetscFunctionReturn(0); 2693 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2694 else { 2695 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2696 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2697 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2698 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2699 jj = a->j; 2700 for (i=0; i<nz; i++) { 2701 collengths[jj[i]]++; 2702 } 2703 cia[0] = oshift; 2704 for (i=0; i<n; i++) { 2705 cia[i+1] = cia[i] + collengths[i]; 2706 } 2707 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2708 jj = a->j; 2709 for (row=0; row<m; row++) { 2710 mr = a->i[row+1] - a->i[row]; 2711 for (i=0; i<mr; i++) { 2712 col = *jj++; 2713 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2714 } 2715 } 2716 ierr = PetscFree(collengths);CHKERRQ(ierr); 2717 *ia = cia; *ja = cja; 2718 } 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2724 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2725 { 2726 PetscErrorCode ierr; 2727 2728 PetscFunctionBegin; 2729 if (!ia) PetscFunctionReturn(0); 2730 ierr = PetscFree(*ia);CHKERRQ(ierr); 2731 ierr = PetscFree(*ja);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2737 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2738 { 2739 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2740 PetscErrorCode ierr; 2741 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2742 PetscScalar dx,*y,*xx,*w3_array; 2743 PetscScalar *vscale_array; 2744 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2745 Vec w1=coloring->w1,w2=coloring->w2,w3; 2746 void *fctx = coloring->fctx; 2747 PetscBool flg = PETSC_FALSE; 2748 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2749 Vec x1_tmp; 2750 2751 PetscFunctionBegin; 2752 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2753 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2754 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2755 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2756 2757 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2758 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2759 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2760 if (flg) { 2761 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2762 } else { 2763 PetscBool assembled; 2764 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2765 if (assembled) { 2766 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2767 } 2768 } 2769 2770 x1_tmp = x1; 2771 if (!coloring->vscale){ 2772 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2773 } 2774 2775 /* 2776 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2777 coloring->F for the coarser grids from the finest 2778 */ 2779 if (coloring->F) { 2780 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2781 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2782 if (m1 != m2) { 2783 coloring->F = 0; 2784 } 2785 } 2786 2787 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2788 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2789 } 2790 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2791 2792 /* Set w1 = F(x1) */ 2793 if (coloring->F) { 2794 w1 = coloring->F; /* use already computed value of function */ 2795 coloring->F = 0; 2796 } else { 2797 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2798 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2799 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2800 } 2801 2802 if (!coloring->w3) { 2803 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2804 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2805 } 2806 w3 = coloring->w3; 2807 2808 CHKMEMQ; 2809 /* Compute all the local scale factors, including ghost points */ 2810 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2811 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2812 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2813 if (ctype == IS_COLORING_GHOSTED){ 2814 col_start = 0; col_end = N; 2815 } else if (ctype == IS_COLORING_GLOBAL){ 2816 xx = xx - start; 2817 vscale_array = vscale_array - start; 2818 col_start = start; col_end = N + start; 2819 } CHKMEMQ; 2820 for (col=col_start; col<col_end; col++){ 2821 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2822 if (coloring->htype[0] == 'w') { 2823 dx = 1.0 + unorm; 2824 } else { 2825 dx = xx[col]; 2826 } 2827 if (dx == 0.0) dx = 1.0; 2828 #if !defined(PETSC_USE_COMPLEX) 2829 if (dx < umin && dx >= 0.0) dx = umin; 2830 else if (dx < 0.0 && dx > -umin) dx = -umin; 2831 #else 2832 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2833 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2834 #endif 2835 dx *= epsilon; 2836 vscale_array[col] = 1.0/dx; 2837 } CHKMEMQ; 2838 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2839 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2840 if (ctype == IS_COLORING_GLOBAL){ 2841 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 } 2844 CHKMEMQ; 2845 if (coloring->vscaleforrow) { 2846 vscaleforrow = coloring->vscaleforrow; 2847 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2848 2849 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2850 /* 2851 Loop over each color 2852 */ 2853 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2854 for (k=0; k<coloring->ncolors; k++) { 2855 coloring->currentcolor = k; 2856 for (i=0; i<bs; i++) { 2857 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2858 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2859 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2860 /* 2861 Loop over each column associated with color 2862 adding the perturbation to the vector w3. 2863 */ 2864 for (l=0; l<coloring->ncolumns[k]; l++) { 2865 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2866 if (coloring->htype[0] == 'w') { 2867 dx = 1.0 + unorm; 2868 } else { 2869 dx = xx[col]; 2870 } 2871 if (dx == 0.0) dx = 1.0; 2872 #if !defined(PETSC_USE_COMPLEX) 2873 if (dx < umin && dx >= 0.0) dx = umin; 2874 else if (dx < 0.0 && dx > -umin) dx = -umin; 2875 #else 2876 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2877 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2878 #endif 2879 dx *= epsilon; 2880 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2881 w3_array[col] += dx; 2882 } 2883 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2884 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2885 2886 /* 2887 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2888 w2 = F(x1 + dx) - F(x1) 2889 */ 2890 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2891 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2892 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2893 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2894 2895 /* 2896 Loop over rows of vector, putting results into Jacobian matrix 2897 */ 2898 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2899 for (l=0; l<coloring->nrows[k]; l++) { 2900 row = bs*coloring->rows[k][l]; /* local row index */ 2901 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2902 for (j=0; j<bs; j++) { 2903 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2904 srows[j] = row + start + j; 2905 } 2906 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2907 } 2908 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2909 } 2910 } /* endof for each color */ 2911 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2912 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2913 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2914 ierr = PetscFree(srows);CHKERRQ(ierr); 2915 2916 coloring->currentcolor = -1; 2917 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2918 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2919 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2920 PetscFunctionReturn(0); 2921 } 2922 2923 /* -------------------------------------------------------------------*/ 2924 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2925 MatGetRow_SeqBAIJ, 2926 MatRestoreRow_SeqBAIJ, 2927 MatMult_SeqBAIJ_N, 2928 /* 4*/ MatMultAdd_SeqBAIJ_N, 2929 MatMultTranspose_SeqBAIJ, 2930 MatMultTransposeAdd_SeqBAIJ, 2931 0, 2932 0, 2933 0, 2934 /*10*/ 0, 2935 MatLUFactor_SeqBAIJ, 2936 0, 2937 0, 2938 MatTranspose_SeqBAIJ, 2939 /*15*/ MatGetInfo_SeqBAIJ, 2940 MatEqual_SeqBAIJ, 2941 MatGetDiagonal_SeqBAIJ, 2942 MatDiagonalScale_SeqBAIJ, 2943 MatNorm_SeqBAIJ, 2944 /*20*/ 0, 2945 MatAssemblyEnd_SeqBAIJ, 2946 MatSetOption_SeqBAIJ, 2947 MatZeroEntries_SeqBAIJ, 2948 /*24*/ MatZeroRows_SeqBAIJ, 2949 0, 2950 0, 2951 0, 2952 0, 2953 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2954 0, 2955 0, 2956 MatGetArray_SeqBAIJ, 2957 MatRestoreArray_SeqBAIJ, 2958 /*34*/ MatDuplicate_SeqBAIJ, 2959 0, 2960 0, 2961 MatILUFactor_SeqBAIJ, 2962 0, 2963 /*39*/ MatAXPY_SeqBAIJ, 2964 MatGetSubMatrices_SeqBAIJ, 2965 MatIncreaseOverlap_SeqBAIJ, 2966 MatGetValues_SeqBAIJ, 2967 MatCopy_SeqBAIJ, 2968 /*44*/ 0, 2969 MatScale_SeqBAIJ, 2970 0, 2971 0, 2972 MatZeroRowsColumns_SeqBAIJ, 2973 /*49*/ MatSetBlockSize_SeqBAIJ, 2974 MatGetRowIJ_SeqBAIJ, 2975 MatRestoreRowIJ_SeqBAIJ, 2976 MatGetColumnIJ_SeqBAIJ, 2977 MatRestoreColumnIJ_SeqBAIJ, 2978 /*54*/ MatFDColoringCreate_SeqAIJ, 2979 0, 2980 0, 2981 0, 2982 MatSetValuesBlocked_SeqBAIJ, 2983 /*59*/ MatGetSubMatrix_SeqBAIJ, 2984 MatDestroy_SeqBAIJ, 2985 MatView_SeqBAIJ, 2986 0, 2987 0, 2988 /*64*/ 0, 2989 0, 2990 0, 2991 0, 2992 0, 2993 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2994 0, 2995 MatConvert_Basic, 2996 0, 2997 0, 2998 /*74*/ 0, 2999 MatFDColoringApply_BAIJ, 3000 0, 3001 0, 3002 0, 3003 /*79*/ 0, 3004 0, 3005 0, 3006 0, 3007 MatLoad_SeqBAIJ, 3008 /*84*/ 0, 3009 0, 3010 0, 3011 0, 3012 0, 3013 /*89*/ 0, 3014 0, 3015 0, 3016 0, 3017 0, 3018 /*94*/ 0, 3019 0, 3020 0, 3021 0, 3022 0, 3023 /*99*/0, 3024 0, 3025 0, 3026 0, 3027 0, 3028 /*104*/0, 3029 MatRealPart_SeqBAIJ, 3030 MatImaginaryPart_SeqBAIJ, 3031 0, 3032 0, 3033 /*109*/0, 3034 0, 3035 0, 3036 0, 3037 MatMissingDiagonal_SeqBAIJ, 3038 /*114*/0, 3039 0, 3040 0, 3041 0, 3042 0, 3043 /*119*/0, 3044 0, 3045 MatMultHermitianTranspose_SeqBAIJ, 3046 MatMultHermitianTransposeAdd_SeqBAIJ, 3047 0, 3048 }; 3049 3050 EXTERN_C_BEGIN 3051 #undef __FUNCT__ 3052 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3053 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 3054 { 3055 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3056 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3057 PetscErrorCode ierr; 3058 3059 PetscFunctionBegin; 3060 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3061 3062 /* allocate space for values if not already there */ 3063 if (!aij->saved_values) { 3064 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3065 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3066 } 3067 3068 /* copy values over */ 3069 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 EXTERN_C_END 3073 3074 EXTERN_C_BEGIN 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3077 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 3078 { 3079 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3080 PetscErrorCode ierr; 3081 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3082 3083 PetscFunctionBegin; 3084 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3085 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3086 3087 /* copy values over */ 3088 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 EXTERN_C_END 3092 3093 EXTERN_C_BEGIN 3094 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3095 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3096 EXTERN_C_END 3097 3098 EXTERN_C_BEGIN 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3101 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3102 { 3103 Mat_SeqBAIJ *b; 3104 PetscErrorCode ierr; 3105 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3106 PetscBool flg,skipallocation = PETSC_FALSE; 3107 3108 PetscFunctionBegin; 3109 3110 if (nz == MAT_SKIP_ALLOCATION) { 3111 skipallocation = PETSC_TRUE; 3112 nz = 0; 3113 } 3114 3115 if (bs < 0) { 3116 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3117 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3118 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3119 bs = PetscAbs(bs); 3120 } 3121 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3122 bs = newbs; 3123 3124 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3125 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3126 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3127 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3128 3129 B->preallocated = PETSC_TRUE; 3130 3131 mbs = B->rmap->n/bs; 3132 nbs = B->cmap->n/bs; 3133 bs2 = bs*bs; 3134 3135 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3136 3137 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3138 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3139 if (nnz) { 3140 for (i=0; i<mbs; i++) { 3141 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3142 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3143 } 3144 } 3145 3146 b = (Mat_SeqBAIJ*)B->data; 3147 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3148 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3149 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3150 3151 if (!flg) { 3152 switch (bs) { 3153 case 1: 3154 B->ops->mult = MatMult_SeqBAIJ_1; 3155 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3156 B->ops->sor = MatSOR_SeqBAIJ_1; 3157 break; 3158 case 2: 3159 B->ops->mult = MatMult_SeqBAIJ_2; 3160 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3161 B->ops->sor = MatSOR_SeqBAIJ_2; 3162 break; 3163 case 3: 3164 B->ops->mult = MatMult_SeqBAIJ_3; 3165 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3166 B->ops->sor = MatSOR_SeqBAIJ_3; 3167 break; 3168 case 4: 3169 B->ops->mult = MatMult_SeqBAIJ_4; 3170 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3171 B->ops->sor = MatSOR_SeqBAIJ_4; 3172 break; 3173 case 5: 3174 B->ops->mult = MatMult_SeqBAIJ_5; 3175 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3176 B->ops->sor = MatSOR_SeqBAIJ_5; 3177 break; 3178 case 6: 3179 B->ops->mult = MatMult_SeqBAIJ_6; 3180 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3181 B->ops->sor = MatSOR_SeqBAIJ_6; 3182 break; 3183 case 7: 3184 B->ops->mult = MatMult_SeqBAIJ_7; 3185 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3186 B->ops->sor = MatSOR_SeqBAIJ_7; 3187 break; 3188 case 15: 3189 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3190 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3191 B->ops->sor = MatSOR_SeqBAIJ_N; 3192 break; 3193 default: 3194 B->ops->mult = MatMult_SeqBAIJ_N; 3195 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3196 B->ops->sor = MatSOR_SeqBAIJ_N; 3197 break; 3198 } 3199 } 3200 B->rmap->bs = bs; 3201 b->mbs = mbs; 3202 b->nbs = nbs; 3203 if (!skipallocation) { 3204 if (!b->imax) { 3205 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3206 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3207 b->free_imax_ilen = PETSC_TRUE; 3208 } 3209 /* b->ilen will count nonzeros in each block row so far. */ 3210 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3211 if (!nnz) { 3212 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3213 else if (nz < 0) nz = 1; 3214 for (i=0; i<mbs; i++) b->imax[i] = nz; 3215 nz = nz*mbs; 3216 } else { 3217 nz = 0; 3218 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3219 } 3220 3221 /* allocate the matrix space */ 3222 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3223 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3224 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3225 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3226 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3227 b->singlemalloc = PETSC_TRUE; 3228 b->i[0] = 0; 3229 for (i=1; i<mbs+1; i++) { 3230 b->i[i] = b->i[i-1] + b->imax[i-1]; 3231 } 3232 b->free_a = PETSC_TRUE; 3233 b->free_ij = PETSC_TRUE; 3234 } else { 3235 b->free_a = PETSC_FALSE; 3236 b->free_ij = PETSC_FALSE; 3237 } 3238 3239 B->rmap->bs = bs; 3240 b->bs2 = bs2; 3241 b->mbs = mbs; 3242 b->nz = 0; 3243 b->maxnz = nz; 3244 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3245 PetscFunctionReturn(0); 3246 } 3247 EXTERN_C_END 3248 3249 EXTERN_C_BEGIN 3250 #undef __FUNCT__ 3251 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3252 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3253 { 3254 PetscInt i,m,nz,nz_max=0,*nnz; 3255 PetscScalar *values=0; 3256 PetscErrorCode ierr; 3257 3258 PetscFunctionBegin; 3259 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3260 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3261 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3262 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3263 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3264 m = B->rmap->n/bs; 3265 3266 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3267 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3268 for(i=0; i<m; i++) { 3269 nz = ii[i+1]- ii[i]; 3270 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3271 nz_max = PetscMax(nz_max, nz); 3272 nnz[i] = nz; 3273 } 3274 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3275 ierr = PetscFree(nnz);CHKERRQ(ierr); 3276 3277 values = (PetscScalar*)V; 3278 if (!values) { 3279 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3280 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3281 } 3282 for (i=0; i<m; i++) { 3283 PetscInt ncols = ii[i+1] - ii[i]; 3284 const PetscInt *icols = jj + ii[i]; 3285 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3286 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3287 } 3288 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3289 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3291 3292 PetscFunctionReturn(0); 3293 } 3294 EXTERN_C_END 3295 3296 3297 EXTERN_C_BEGIN 3298 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3299 #if defined(PETSC_HAVE_MUMPS) 3300 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3301 #endif 3302 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3303 EXTERN_C_END 3304 3305 /*MC 3306 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3307 block sparse compressed row format. 3308 3309 Options Database Keys: 3310 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3311 3312 Level: beginner 3313 3314 .seealso: MatCreateSeqBAIJ() 3315 M*/ 3316 3317 3318 EXTERN_C_BEGIN 3319 #undef __FUNCT__ 3320 #define __FUNCT__ "MatCreate_SeqBAIJ" 3321 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3322 { 3323 PetscErrorCode ierr; 3324 PetscMPIInt size; 3325 Mat_SeqBAIJ *b; 3326 3327 PetscFunctionBegin; 3328 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3329 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3330 3331 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3332 B->data = (void*)b; 3333 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3334 B->mapping = 0; 3335 b->row = 0; 3336 b->col = 0; 3337 b->icol = 0; 3338 b->reallocs = 0; 3339 b->saved_values = 0; 3340 3341 b->roworiented = PETSC_TRUE; 3342 b->nonew = 0; 3343 b->diag = 0; 3344 b->solve_work = 0; 3345 b->mult_work = 0; 3346 B->spptr = 0; 3347 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3348 b->keepnonzeropattern = PETSC_FALSE; 3349 b->xtoy = 0; 3350 b->XtoY = 0; 3351 b->compressedrow.use = PETSC_FALSE; 3352 b->compressedrow.nrows = 0; 3353 b->compressedrow.i = PETSC_NULL; 3354 b->compressedrow.rindex = PETSC_NULL; 3355 b->compressedrow.checked = PETSC_FALSE; 3356 B->same_nonzero = PETSC_FALSE; 3357 3358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3359 "MatGetFactorAvailable_seqbaij_petsc", 3360 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3362 "MatGetFactor_seqbaij_petsc", 3363 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3364 #if defined(PETSC_HAVE_MUMPS) 3365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3366 #endif 3367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3368 "MatInvertBlockDiagonal_SeqBAIJ", 3369 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3371 "MatStoreValues_SeqBAIJ", 3372 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3374 "MatRetrieveValues_SeqBAIJ", 3375 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3376 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3377 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3378 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3380 "MatConvert_SeqBAIJ_SeqAIJ", 3381 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3383 "MatConvert_SeqBAIJ_SeqSBAIJ", 3384 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3386 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3387 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3389 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3390 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3391 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3392 PetscFunctionReturn(0); 3393 } 3394 EXTERN_C_END 3395 3396 #undef __FUNCT__ 3397 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3398 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3399 { 3400 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3401 PetscErrorCode ierr; 3402 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3403 3404 PetscFunctionBegin; 3405 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3406 3407 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3408 c->imax = a->imax; 3409 c->ilen = a->ilen; 3410 c->free_imax_ilen = PETSC_FALSE; 3411 } else { 3412 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3413 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3414 for (i=0; i<mbs; i++) { 3415 c->imax[i] = a->imax[i]; 3416 c->ilen[i] = a->ilen[i]; 3417 } 3418 c->free_imax_ilen = PETSC_TRUE; 3419 } 3420 3421 /* allocate the matrix space */ 3422 if (mallocmatspace){ 3423 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3424 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3425 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3426 c->singlemalloc = PETSC_FALSE; 3427 c->free_ij = PETSC_FALSE; 3428 c->i = a->i; 3429 c->j = a->j; 3430 c->parent = A; 3431 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3432 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3433 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3434 } else { 3435 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3436 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3437 c->singlemalloc = PETSC_TRUE; 3438 c->free_ij = PETSC_TRUE; 3439 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3440 if (mbs > 0) { 3441 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3442 if (cpvalues == MAT_COPY_VALUES) { 3443 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3444 } else { 3445 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3446 } 3447 } 3448 } 3449 } 3450 3451 c->roworiented = a->roworiented; 3452 c->nonew = a->nonew; 3453 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3454 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3455 c->bs2 = a->bs2; 3456 c->mbs = a->mbs; 3457 c->nbs = a->nbs; 3458 3459 if (a->diag) { 3460 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3461 c->diag = a->diag; 3462 c->free_diag = PETSC_FALSE; 3463 } else { 3464 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3465 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3466 for (i=0; i<mbs; i++) { 3467 c->diag[i] = a->diag[i]; 3468 } 3469 c->free_diag = PETSC_TRUE; 3470 } 3471 } else c->diag = 0; 3472 c->nz = a->nz; 3473 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3474 c->solve_work = 0; 3475 c->mult_work = 0; 3476 c->free_a = PETSC_TRUE; 3477 c->free_ij = PETSC_TRUE; 3478 C->preallocated = PETSC_TRUE; 3479 C->assembled = PETSC_TRUE; 3480 3481 c->compressedrow.use = a->compressedrow.use; 3482 c->compressedrow.nrows = a->compressedrow.nrows; 3483 c->compressedrow.checked = a->compressedrow.checked; 3484 if (a->compressedrow.checked && a->compressedrow.use){ 3485 i = a->compressedrow.nrows; 3486 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3487 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3488 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3489 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3490 } else { 3491 c->compressedrow.use = PETSC_FALSE; 3492 c->compressedrow.i = PETSC_NULL; 3493 c->compressedrow.rindex = PETSC_NULL; 3494 } 3495 C->same_nonzero = A->same_nonzero; 3496 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3497 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3503 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3504 { 3505 PetscErrorCode ierr; 3506 3507 PetscFunctionBegin; 3508 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3509 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3510 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3511 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3512 PetscFunctionReturn(0); 3513 } 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatLoad_SeqBAIJ" 3517 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3518 { 3519 Mat_SeqBAIJ *a; 3520 PetscErrorCode ierr; 3521 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3522 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3523 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3524 PetscInt *masked,nmask,tmp,bs2,ishift; 3525 PetscMPIInt size; 3526 int fd; 3527 PetscScalar *aa; 3528 MPI_Comm comm = ((PetscObject)viewer)->comm; 3529 3530 PetscFunctionBegin; 3531 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3532 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3534 bs2 = bs*bs; 3535 3536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3537 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3538 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3539 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3540 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3541 M = header[1]; N = header[2]; nz = header[3]; 3542 3543 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3544 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3545 3546 /* 3547 This code adds extra rows to make sure the number of rows is 3548 divisible by the blocksize 3549 */ 3550 mbs = M/bs; 3551 extra_rows = bs - M + bs*(mbs); 3552 if (extra_rows == bs) extra_rows = 0; 3553 else mbs++; 3554 if (extra_rows) { 3555 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3556 } 3557 3558 /* Set global sizes if not already set */ 3559 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3560 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3561 } else { /* Check if the matrix global sizes are correct */ 3562 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3563 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3564 } 3565 3566 /* read in row lengths */ 3567 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3568 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3569 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3570 3571 /* read in column indices */ 3572 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3573 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3574 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3575 3576 /* loop over row lengths determining block row lengths */ 3577 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3578 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3579 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3580 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3581 rowcount = 0; 3582 nzcount = 0; 3583 for (i=0; i<mbs; i++) { 3584 nmask = 0; 3585 for (j=0; j<bs; j++) { 3586 kmax = rowlengths[rowcount]; 3587 for (k=0; k<kmax; k++) { 3588 tmp = jj[nzcount++]/bs; 3589 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3590 } 3591 rowcount++; 3592 } 3593 browlengths[i] += nmask; 3594 /* zero out the mask elements we set */ 3595 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3596 } 3597 3598 /* Do preallocation */ 3599 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3600 a = (Mat_SeqBAIJ*)newmat->data; 3601 3602 /* set matrix "i" values */ 3603 a->i[0] = 0; 3604 for (i=1; i<= mbs; i++) { 3605 a->i[i] = a->i[i-1] + browlengths[i-1]; 3606 a->ilen[i-1] = browlengths[i-1]; 3607 } 3608 a->nz = 0; 3609 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3610 3611 /* read in nonzero values */ 3612 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3613 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3614 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3615 3616 /* set "a" and "j" values into matrix */ 3617 nzcount = 0; jcount = 0; 3618 for (i=0; i<mbs; i++) { 3619 nzcountb = nzcount; 3620 nmask = 0; 3621 for (j=0; j<bs; j++) { 3622 kmax = rowlengths[i*bs+j]; 3623 for (k=0; k<kmax; k++) { 3624 tmp = jj[nzcount++]/bs; 3625 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3626 } 3627 } 3628 /* sort the masked values */ 3629 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3630 3631 /* set "j" values into matrix */ 3632 maskcount = 1; 3633 for (j=0; j<nmask; j++) { 3634 a->j[jcount++] = masked[j]; 3635 mask[masked[j]] = maskcount++; 3636 } 3637 /* set "a" values into matrix */ 3638 ishift = bs2*a->i[i]; 3639 for (j=0; j<bs; j++) { 3640 kmax = rowlengths[i*bs+j]; 3641 for (k=0; k<kmax; k++) { 3642 tmp = jj[nzcountb]/bs ; 3643 block = mask[tmp] - 1; 3644 point = jj[nzcountb] - bs*tmp; 3645 idx = ishift + bs2*block + j + bs*point; 3646 a->a[idx] = (MatScalar)aa[nzcountb++]; 3647 } 3648 } 3649 /* zero out the mask elements we set */ 3650 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3651 } 3652 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3653 3654 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3655 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3656 ierr = PetscFree(aa);CHKERRQ(ierr); 3657 ierr = PetscFree(jj);CHKERRQ(ierr); 3658 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3659 3660 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3661 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3662 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3663 PetscFunctionReturn(0); 3664 } 3665 3666 #undef __FUNCT__ 3667 #define __FUNCT__ "MatCreateSeqBAIJ" 3668 /*@C 3669 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3670 compressed row) format. For good matrix assembly performance the 3671 user should preallocate the matrix storage by setting the parameter nz 3672 (or the array nnz). By setting these parameters accurately, performance 3673 during matrix assembly can be increased by more than a factor of 50. 3674 3675 Collective on MPI_Comm 3676 3677 Input Parameters: 3678 + comm - MPI communicator, set to PETSC_COMM_SELF 3679 . bs - size of block 3680 . m - number of rows 3681 . n - number of columns 3682 . nz - number of nonzero blocks per block row (same for all rows) 3683 - nnz - array containing the number of nonzero blocks in the various block rows 3684 (possibly different for each block row) or PETSC_NULL 3685 3686 Output Parameter: 3687 . A - the matrix 3688 3689 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3690 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3691 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3692 3693 Options Database Keys: 3694 . -mat_no_unroll - uses code that does not unroll the loops in the 3695 block calculations (much slower) 3696 . -mat_block_size - size of the blocks to use 3697 3698 Level: intermediate 3699 3700 Notes: 3701 The number of rows and columns must be divisible by blocksize. 3702 3703 If the nnz parameter is given then the nz parameter is ignored 3704 3705 A nonzero block is any block that as 1 or more nonzeros in it 3706 3707 The block AIJ format is fully compatible with standard Fortran 77 3708 storage. That is, the stored row and column indices can begin at 3709 either one (as in Fortran) or zero. See the users' manual for details. 3710 3711 Specify the preallocated storage with either nz or nnz (not both). 3712 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3713 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3714 matrices. 3715 3716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3717 @*/ 3718 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3719 { 3720 PetscErrorCode ierr; 3721 3722 PetscFunctionBegin; 3723 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3724 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3725 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3726 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3727 PetscFunctionReturn(0); 3728 } 3729 3730 #undef __FUNCT__ 3731 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3732 /*@C 3733 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3734 per row in the matrix. For good matrix assembly performance the 3735 user should preallocate the matrix storage by setting the parameter nz 3736 (or the array nnz). By setting these parameters accurately, performance 3737 during matrix assembly can be increased by more than a factor of 50. 3738 3739 Collective on MPI_Comm 3740 3741 Input Parameters: 3742 + A - the matrix 3743 . bs - size of block 3744 . nz - number of block nonzeros per block row (same for all rows) 3745 - nnz - array containing the number of block nonzeros in the various block rows 3746 (possibly different for each block row) or PETSC_NULL 3747 3748 Options Database Keys: 3749 . -mat_no_unroll - uses code that does not unroll the loops in the 3750 block calculations (much slower) 3751 . -mat_block_size - size of the blocks to use 3752 3753 Level: intermediate 3754 3755 Notes: 3756 If the nnz parameter is given then the nz parameter is ignored 3757 3758 You can call MatGetInfo() to get information on how effective the preallocation was; 3759 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3760 You can also run with the option -info and look for messages with the string 3761 malloc in them to see if additional memory allocation was needed. 3762 3763 The block AIJ format is fully compatible with standard Fortran 77 3764 storage. That is, the stored row and column indices can begin at 3765 either one (as in Fortran) or zero. See the users' manual for details. 3766 3767 Specify the preallocated storage with either nz or nnz (not both). 3768 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3769 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3770 3771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3772 @*/ 3773 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3774 { 3775 PetscErrorCode ierr; 3776 3777 PetscFunctionBegin; 3778 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3779 PetscFunctionReturn(0); 3780 } 3781 3782 #undef __FUNCT__ 3783 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3784 /*@C 3785 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3786 (the default sequential PETSc format). 3787 3788 Collective on MPI_Comm 3789 3790 Input Parameters: 3791 + A - the matrix 3792 . i - the indices into j for the start of each local row (starts with zero) 3793 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3794 - v - optional values in the matrix 3795 3796 Level: developer 3797 3798 .keywords: matrix, aij, compressed row, sparse 3799 3800 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3801 @*/ 3802 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3803 { 3804 PetscErrorCode ierr; 3805 3806 PetscFunctionBegin; 3807 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3808 PetscFunctionReturn(0); 3809 } 3810 3811 3812 #undef __FUNCT__ 3813 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3814 /*@ 3815 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3816 3817 Collective on MPI_Comm 3818 3819 Input Parameters: 3820 + comm - must be an MPI communicator of size 1 3821 . bs - size of block 3822 . m - number of rows 3823 . n - number of columns 3824 . i - row indices 3825 . j - column indices 3826 - a - matrix values 3827 3828 Output Parameter: 3829 . mat - the matrix 3830 3831 Level: advanced 3832 3833 Notes: 3834 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3835 once the matrix is destroyed 3836 3837 You cannot set new nonzero locations into this matrix, that will generate an error. 3838 3839 The i and j indices are 0 based 3840 3841 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3842 3843 3844 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3845 3846 @*/ 3847 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3848 { 3849 PetscErrorCode ierr; 3850 PetscInt ii; 3851 Mat_SeqBAIJ *baij; 3852 3853 PetscFunctionBegin; 3854 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3855 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3856 3857 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3858 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3859 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3860 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3861 baij = (Mat_SeqBAIJ*)(*mat)->data; 3862 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3863 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3864 3865 baij->i = i; 3866 baij->j = j; 3867 baij->a = a; 3868 baij->singlemalloc = PETSC_FALSE; 3869 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3870 baij->free_a = PETSC_FALSE; 3871 baij->free_ij = PETSC_FALSE; 3872 3873 for (ii=0; ii<m; ii++) { 3874 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3875 #if defined(PETSC_USE_DEBUG) 3876 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3877 #endif 3878 } 3879 #if defined(PETSC_USE_DEBUG) 3880 for (ii=0; ii<baij->i[m]; ii++) { 3881 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3882 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3883 } 3884 #endif 3885 3886 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 PetscFunctionReturn(0); 3889 } 3890