1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "../src/mat/blockinvert.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr,(*f)(Mat); 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 28 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 31 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 32 if (f) { 33 ierr = (*f)(mat);CHKERRQ(ierr); 34 } else { 35 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 36 } 37 PetscFunctionReturn(0); 38 } 39 40 EXTERN_C_BEGIN 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 43 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 44 { 45 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 46 PetscErrorCode ierr; 47 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5]; 48 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25]; 49 PetscReal shift = 0.0; 50 51 PetscFunctionBegin; 52 if (a->idiagvalid) PetscFunctionReturn(0); 53 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 54 diag_offset = a->diag; 55 if (!a->idiag) { 56 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 57 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 58 } 59 diag = a->idiag; 60 mdiag = a->idiag+bs*bs*mbs; 61 /* factor and invert each block */ 62 switch (bs){ 63 case 1: 64 for (i=0; i<mbs; i++) { 65 odiag = v + 1*diag_offset[i]; 66 diag[0] = odiag[0]; 67 mdiag[0] = odiag[0]; 68 diag[0] = 1.0 / (diag[0] + shift); 69 diag += 1; 70 mdiag += 1; 71 } 72 break; 73 case 2: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 4*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 78 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 79 diag += 4; 80 mdiag += 4; 81 } 82 break; 83 case 3: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 9*diag_offset[i]; 86 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 87 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 88 diag[8] = odiag[8]; 89 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 90 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 91 mdiag[8] = odiag[8]; 92 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 93 diag += 9; 94 mdiag += 9; 95 } 96 break; 97 case 4: 98 for (i=0; i<mbs; i++) { 99 odiag = v + 16*diag_offset[i]; 100 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 103 diag += 16; 104 mdiag += 16; 105 } 106 break; 107 case 5: 108 for (i=0; i<mbs; i++) { 109 odiag = v + 25*diag_offset[i]; 110 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 113 diag += 25; 114 mdiag += 25; 115 } 116 break; 117 case 6: 118 for (i=0; i<mbs; i++) { 119 odiag = v + 36*diag_offset[i]; 120 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 121 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 123 diag += 36; 124 mdiag += 36; 125 } 126 break; 127 default: 128 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 PetscFunctionReturn(0); 132 } 133 EXTERN_C_END 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 137 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 138 { 139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 140 PetscScalar *x,x1,s1; 141 const PetscScalar *b; 142 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,j; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 its = its*lits; 150 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 155 156 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 157 158 diag = a->diag; 159 idiag = a->idiag; 160 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 161 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 162 163 if (flag & SOR_ZERO_INITIAL_GUESS) { 164 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 165 x[0] = b[0]*idiag[0]; 166 i2 = 1; 167 idiag += 1; 168 for (i=1; i<m; i++) { 169 v = aa + ai[i]; 170 vi = aj + ai[i]; 171 nz = diag[i] - ai[i]; 172 s1 = b[i2]; 173 for (j=0; j<nz; j++) { 174 s1 -= v[j]*x[vi[j]]; 175 } 176 x[i2] = idiag[0]*s1; 177 idiag += 1; 178 i2 += 1; 179 } 180 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 181 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 182 } 183 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 184 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 185 i2 = 0; 186 mdiag = a->idiag+a->mbs; 187 for (i=0; i<m; i++) { 188 x1 = x[i2]; 189 x[i2] = mdiag[0]*x1; 190 mdiag += 1; 191 i2 += 1; 192 } 193 ierr = PetscLogFlops(m);CHKERRQ(ierr); 194 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 195 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 196 } 197 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 198 idiag = a->idiag+a->mbs - 1; 199 i2 = m - 1; 200 x1 = x[i2]; 201 x[i2] = idiag[0]*x1; 202 idiag -= 1; 203 i2 -= 1; 204 for (i=m-2; i>=0; i--) { 205 v = aa + (diag[i]+1); 206 vi = aj + diag[i] + 1; 207 nz = ai[i+1] - diag[i] - 1; 208 s1 = x[i2]; 209 for (j=0; j<nz; j++) { 210 s1 -= v[j]*x[vi[j]]; 211 } 212 x[i2] = idiag[0]*s1; 213 idiag -= 1; 214 i2 -= 1; 215 } 216 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 217 } 218 } else { 219 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 220 } 221 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 222 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 228 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 229 { 230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 231 PetscScalar *x,x1,x2,s1,s2; 232 const PetscScalar *b; 233 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 234 PetscErrorCode ierr; 235 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 236 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 237 238 PetscFunctionBegin; 239 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 240 its = its*lits; 241 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 242 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 243 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 244 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 245 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 246 247 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 248 249 diag = a->diag; 250 idiag = a->idiag; 251 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 252 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 253 254 if (flag & SOR_ZERO_INITIAL_GUESS) { 255 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 256 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 257 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 258 i2 = 2; 259 idiag += 4; 260 for (i=1; i<m; i++) { 261 v = aa + 4*ai[i]; 262 vi = aj + ai[i]; 263 nz = diag[i] - ai[i]; 264 s1 = b[i2]; s2 = b[i2+1]; 265 for (j=0; j<nz; j++) { 266 idx = 2*vi[j]; 267 it = 4*j; 268 x1 = x[idx]; x2 = x[1+idx]; 269 s1 -= v[it]*x1 + v[it+2]*x2; 270 s2 -= v[it+1]*x1 + v[it+3]*x2; 271 } 272 x[i2] = idiag[0]*s1 + idiag[2]*s2; 273 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 274 idiag += 4; 275 i2 += 2; 276 } 277 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 278 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 279 } 280 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 281 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 282 i2 = 0; 283 mdiag = a->idiag+4*a->mbs; 284 for (i=0; i<m; i++) { 285 x1 = x[i2]; x2 = x[i2+1]; 286 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 287 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 288 mdiag += 4; 289 i2 += 2; 290 } 291 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 292 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 293 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 294 } 295 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 296 idiag = a->idiag+4*a->mbs - 4; 297 i2 = 2*m - 2; 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = idiag[0]*x1 + idiag[2]*x2; 300 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 301 idiag -= 4; 302 i2 -= 2; 303 for (i=m-2; i>=0; i--) { 304 v = aa + 4*(diag[i]+1); 305 vi = aj + diag[i] + 1; 306 nz = ai[i+1] - diag[i] - 1; 307 s1 = x[i2]; s2 = x[i2+1]; 308 for (j=0; j<nz; j++) { 309 idx = 2*vi[j]; 310 it = 4*j; 311 x1 = x[idx]; x2 = x[1+idx]; 312 s1 -= v[it]*x1 + v[it+2]*x2; 313 s2 -= v[it+1]*x1 + v[it+3]*x2; 314 } 315 x[i2] = idiag[0]*s1 + idiag[2]*s2; 316 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 317 idiag -= 4; 318 i2 -= 2; 319 } 320 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 321 } 322 } else { 323 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 324 } 325 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 326 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 332 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 335 PetscScalar *x,x1,x2,x3,s1,s2,s3; 336 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 337 const PetscScalar *b; 338 PetscErrorCode ierr; 339 PetscInt m = a->mbs,i,i2,nz,idx; 340 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 341 342 PetscFunctionBegin; 343 its = its*lits; 344 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 345 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 346 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 347 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 348 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 349 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 350 351 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 352 353 diag = a->diag; 354 idiag = a->idiag; 355 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 356 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 357 358 if (flag & SOR_ZERO_INITIAL_GUESS) { 359 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 360 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 361 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 362 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 363 i2 = 3; 364 idiag += 9; 365 for (i=1; i<m; i++) { 366 v = aa + 9*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 370 while (nz--) { 371 idx = 3*(*vi++); 372 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 373 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 374 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 375 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 376 v += 9; 377 } 378 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 379 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 380 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 381 idiag += 9; 382 i2 += 3; 383 } 384 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 385 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 386 } 387 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 388 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 389 i2 = 0; 390 mdiag = a->idiag+9*a->mbs; 391 for (i=0; i<m; i++) { 392 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 393 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 394 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 395 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 396 mdiag += 9; 397 i2 += 3; 398 } 399 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+9*a->mbs - 9; 405 i2 = 3*m - 3; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 407 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 408 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 409 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 410 idiag -= 9; 411 i2 -= 3; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 9*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 417 while (nz--) { 418 idx = 3*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 420 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 421 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 422 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 423 v += 9; 424 } 425 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 426 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 427 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 428 idiag -= 9; 429 i2 -= 3; 430 } 431 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 432 } 433 } else { 434 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 435 } 436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 437 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 443 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 444 { 445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 446 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 447 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 448 const PetscScalar *b; 449 PetscErrorCode ierr; 450 PetscInt m = a->mbs,i,i2,nz,idx; 451 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 452 453 PetscFunctionBegin; 454 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 475 i2 = 4; 476 idiag += 16; 477 for (i=1; i<m; i++) { 478 v = aa + 16*ai[i]; 479 vi = aj + ai[i]; 480 nz = diag[i] - ai[i]; 481 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 482 while (nz--) { 483 idx = 4*(*vi++); 484 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 485 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 486 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 487 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 488 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 489 v += 16; 490 } 491 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 492 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 493 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 494 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 495 idiag += 16; 496 i2 += 4; 497 } 498 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 499 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 500 } 501 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 502 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 503 i2 = 0; 504 mdiag = a->idiag+16*a->mbs; 505 for (i=0; i<m; i++) { 506 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 507 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 508 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 509 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 510 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 511 mdiag += 16; 512 i2 += 4; 513 } 514 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 515 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 516 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 517 } 518 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 519 idiag = a->idiag+16*a->mbs - 16; 520 i2 = 4*m - 4; 521 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 522 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 523 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 524 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 525 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 526 idiag -= 16; 527 i2 -= 4; 528 for (i=m-2; i>=0; i--) { 529 v = aa + 16*(diag[i]+1); 530 vi = aj + diag[i] + 1; 531 nz = ai[i+1] - diag[i] - 1; 532 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 533 while (nz--) { 534 idx = 4*(*vi++); 535 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 536 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 537 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 538 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 539 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 540 v += 16; 541 } 542 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 543 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 544 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 545 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 546 idiag -= 16; 547 i2 -= 4; 548 } 549 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 550 } 551 } else { 552 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 553 } 554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 555 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 561 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 562 { 563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 564 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 565 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 566 const PetscScalar *b; 567 PetscErrorCode ierr; 568 PetscInt m = a->mbs,i,i2,nz,idx; 569 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 570 571 PetscFunctionBegin; 572 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 573 its = its*lits; 574 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 575 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 576 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 577 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 578 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 579 580 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 581 582 diag = a->diag; 583 idiag = a->idiag; 584 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 585 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 586 587 if (flag & SOR_ZERO_INITIAL_GUESS) { 588 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 589 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 590 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 591 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 592 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 593 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 594 i2 = 5; 595 idiag += 25; 596 for (i=1; i<m; i++) { 597 v = aa + 25*ai[i]; 598 vi = aj + ai[i]; 599 nz = diag[i] - ai[i]; 600 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 601 while (nz--) { 602 idx = 5*(*vi++); 603 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 604 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 605 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 606 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 607 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 608 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 609 v += 25; 610 } 611 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 612 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 613 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 614 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 615 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 616 idiag += 25; 617 i2 += 5; 618 } 619 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 620 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 621 } 622 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 623 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 624 i2 = 0; 625 mdiag = a->idiag+25*a->mbs; 626 for (i=0; i<m; i++) { 627 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 628 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 629 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 630 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 631 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 632 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 633 mdiag += 25; 634 i2 += 5; 635 } 636 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 637 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 638 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 639 } 640 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 641 idiag = a->idiag+25*a->mbs - 25; 642 i2 = 5*m - 5; 643 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 644 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 645 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 646 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 647 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 648 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 649 idiag -= 25; 650 i2 -= 5; 651 for (i=m-2; i>=0; i--) { 652 v = aa + 25*(diag[i]+1); 653 vi = aj + diag[i] + 1; 654 nz = ai[i+1] - diag[i] - 1; 655 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 656 while (nz--) { 657 idx = 5*(*vi++); 658 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 659 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 660 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 661 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 662 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 663 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 664 v += 25; 665 } 666 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 667 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 668 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 669 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 670 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 671 idiag -= 25; 672 i2 -= 5; 673 } 674 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 675 } 676 } else { 677 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 678 } 679 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 680 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 686 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 687 { 688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 689 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 690 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 691 const PetscScalar *b; 692 PetscErrorCode ierr; 693 PetscInt m = a->mbs,i,i2,nz,idx; 694 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 695 696 PetscFunctionBegin; 697 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 698 its = its*lits; 699 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 700 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 701 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 702 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 703 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 704 705 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 706 707 diag = a->diag; 708 idiag = a->idiag; 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 711 712 if (flag & SOR_ZERO_INITIAL_GUESS) { 713 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 714 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 715 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 716 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 717 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 718 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 719 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 720 i2 = 6; 721 idiag += 36; 722 for (i=1; i<m; i++) { 723 v = aa + 36*ai[i]; 724 vi = aj + ai[i]; 725 nz = diag[i] - ai[i]; 726 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 727 while (nz--) { 728 idx = 6*(*vi++); 729 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 730 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 731 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 732 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 733 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 734 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 735 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 736 v += 36; 737 } 738 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 739 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 740 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 741 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 742 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 743 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 744 idiag += 36; 745 i2 += 6; 746 } 747 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 748 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 749 } 750 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 751 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 752 i2 = 0; 753 mdiag = a->idiag+36*a->mbs; 754 for (i=0; i<m; i++) { 755 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 756 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 757 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 758 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 759 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 760 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 761 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 762 mdiag += 36; 763 i2 += 6; 764 } 765 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 766 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 767 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 768 } 769 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 770 idiag = a->idiag+36*a->mbs - 36; 771 i2 = 6*m - 6; 772 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 773 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 774 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 775 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 776 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 777 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 778 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 779 idiag -= 36; 780 i2 -= 6; 781 for (i=m-2; i>=0; i--) { 782 v = aa + 36*(diag[i]+1); 783 vi = aj + diag[i] + 1; 784 nz = ai[i+1] - diag[i] - 1; 785 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 786 while (nz--) { 787 idx = 6*(*vi++); 788 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 789 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 790 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 791 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 792 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 793 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 794 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 795 v += 36; 796 } 797 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 798 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 799 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 800 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 801 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 802 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 803 idiag -= 36; 804 i2 -= 6; 805 } 806 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 807 } 808 } else { 809 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 810 } 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 818 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 819 { 820 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 821 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 822 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 823 const PetscScalar *b; 824 PetscErrorCode ierr; 825 PetscInt m = a->mbs,i,i2,nz,idx; 826 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 827 828 PetscFunctionBegin; 829 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 830 its = its*lits; 831 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 832 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 833 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 834 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 835 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 836 837 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 838 839 diag = a->diag; 840 idiag = a->idiag; 841 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 842 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 843 844 if (flag & SOR_ZERO_INITIAL_GUESS) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 846 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 847 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 848 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 849 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 850 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 851 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 852 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 853 i2 = 7; 854 idiag += 49; 855 for (i=1; i<m; i++) { 856 v = aa + 49*ai[i]; 857 vi = aj + ai[i]; 858 nz = diag[i] - ai[i]; 859 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 860 while (nz--) { 861 idx = 7*(*vi++); 862 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 863 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 864 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 865 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 866 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 867 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 868 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 869 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 870 v += 49; 871 } 872 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 873 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 874 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 875 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 876 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 877 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 878 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 879 idiag += 49; 880 i2 += 7; 881 } 882 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 883 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 884 } 885 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 886 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 887 i2 = 0; 888 mdiag = a->idiag+49*a->mbs; 889 for (i=0; i<m; i++) { 890 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 891 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 892 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 893 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 894 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 895 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 896 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 897 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 898 mdiag += 36; 899 i2 += 6; 900 } 901 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 902 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 903 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 904 } 905 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 906 idiag = a->idiag+49*a->mbs - 49; 907 i2 = 7*m - 7; 908 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 909 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 910 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 911 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 912 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 913 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 914 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 915 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 916 idiag -= 49; 917 i2 -= 7; 918 for (i=m-2; i>=0; i--) { 919 v = aa + 49*(diag[i]+1); 920 vi = aj + diag[i] + 1; 921 nz = ai[i+1] - diag[i] - 1; 922 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 923 while (nz--) { 924 idx = 7*(*vi++); 925 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 926 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 927 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 928 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 929 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 930 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 931 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 932 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 933 v += 49; 934 } 935 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 936 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 937 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 938 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 939 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 940 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 941 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 942 idiag -= 49; 943 i2 -= 7; 944 } 945 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 946 } 947 } else { 948 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 949 } 950 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 951 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Special version for direct calls from Fortran (Used in PETSc-fun3d) 957 */ 958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 961 #define matsetvaluesblocked4_ matsetvaluesblocked4 962 #endif 963 964 EXTERN_C_BEGIN 965 #undef __FUNCT__ 966 #define __FUNCT__ "matsetvaluesblocked4_" 967 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 968 { 969 Mat A = *AA; 970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 971 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 972 PetscInt *ai=a->i,*ailen=a->ilen; 973 PetscInt *aj=a->j,stepval,lastcol = -1; 974 const PetscScalar *value = v; 975 MatScalar *ap,*aa = a->a,*bap; 976 977 PetscFunctionBegin; 978 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 979 stepval = (n-1)*4; 980 for (k=0; k<m; k++) { /* loop over added rows */ 981 row = im[k]; 982 rp = aj + ai[row]; 983 ap = aa + 16*ai[row]; 984 nrow = ailen[row]; 985 low = 0; 986 high = nrow; 987 for (l=0; l<n; l++) { /* loop over added columns */ 988 col = in[l]; 989 if (col <= lastcol) low = 0; else high = nrow; 990 lastcol = col; 991 value = v + k*(stepval+4 + l)*4; 992 while (high-low > 7) { 993 t = (low+high)/2; 994 if (rp[t] > col) high = t; 995 else low = t; 996 } 997 for (i=low; i<high; i++) { 998 if (rp[i] > col) break; 999 if (rp[i] == col) { 1000 bap = ap + 16*i; 1001 for (ii=0; ii<4; ii++,value+=stepval) { 1002 for (jj=ii; jj<16; jj+=4) { 1003 bap[jj] += *value++; 1004 } 1005 } 1006 goto noinsert2; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column index thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N >= i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = col; 1020 bap = ap + 16*i; 1021 for (ii=0; ii<4; ii++,value+=stepval) { 1022 for (jj=ii; jj<16; jj+=4) { 1023 bap[jj] = *value++; 1024 } 1025 } 1026 noinsert2:; 1027 low = i; 1028 } 1029 ailen[row] = nrow; 1030 } 1031 PetscFunctionReturnVoid(); 1032 } 1033 EXTERN_C_END 1034 1035 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1036 #define matsetvalues4_ MATSETVALUES4 1037 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1038 #define matsetvalues4_ matsetvalues4 1039 #endif 1040 1041 EXTERN_C_BEGIN 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatSetValues4_" 1044 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1045 { 1046 Mat A = *AA; 1047 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1048 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1049 PetscInt *ai=a->i,*ailen=a->ilen; 1050 PetscInt *aj=a->j,brow,bcol; 1051 PetscInt ridx,cidx,lastcol = -1; 1052 MatScalar *ap,value,*aa=a->a,*bap; 1053 1054 PetscFunctionBegin; 1055 for (k=0; k<m; k++) { /* loop over added rows */ 1056 row = im[k]; brow = row/4; 1057 rp = aj + ai[brow]; 1058 ap = aa + 16*ai[brow]; 1059 nrow = ailen[brow]; 1060 low = 0; 1061 high = nrow; 1062 for (l=0; l<n; l++) { /* loop over added columns */ 1063 col = in[l]; bcol = col/4; 1064 ridx = row % 4; cidx = col % 4; 1065 value = v[l + k*n]; 1066 if (col <= lastcol) low = 0; else high = nrow; 1067 lastcol = col; 1068 while (high-low > 7) { 1069 t = (low+high)/2; 1070 if (rp[t] > bcol) high = t; 1071 else low = t; 1072 } 1073 for (i=low; i<high; i++) { 1074 if (rp[i] > bcol) break; 1075 if (rp[i] == bcol) { 1076 bap = ap + 16*i + 4*cidx + ridx; 1077 *bap += value; 1078 goto noinsert1; 1079 } 1080 } 1081 N = nrow++ - 1; 1082 high++; /* added new column thus must search to one higher than before */ 1083 /* shift up all the later entries in this row */ 1084 for (ii=N; ii>=i; ii--) { 1085 rp[ii+1] = rp[ii]; 1086 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1087 } 1088 if (N>=i) { 1089 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1090 } 1091 rp[i] = bcol; 1092 ap[16*i + 4*cidx + ridx] = value; 1093 noinsert1:; 1094 low = i; 1095 } 1096 ailen[brow] = nrow; 1097 } 1098 PetscFunctionReturnVoid(); 1099 } 1100 EXTERN_C_END 1101 1102 /* 1103 Checks for missing diagonals 1104 */ 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1107 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1108 { 1109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt *diag,*jj = a->j,i; 1112 1113 PetscFunctionBegin; 1114 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1115 *missing = PETSC_FALSE; 1116 if (A->rmap->n > 0 && !jj) { 1117 *missing = PETSC_TRUE; 1118 if (d) *d = 0; 1119 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1120 } else { 1121 diag = a->diag; 1122 for (i=0; i<a->mbs; i++) { 1123 if (jj[diag[i]] != i) { 1124 *missing = PETSC_TRUE; 1125 if (d) *d = i; 1126 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1127 } 1128 } 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1135 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1136 { 1137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 PetscInt i,j,m = a->mbs; 1140 1141 PetscFunctionBegin; 1142 if (!a->diag) { 1143 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1144 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1145 a->free_diag = PETSC_TRUE; 1146 } 1147 for (i=0; i<m; i++) { 1148 a->diag[i] = a->i[i+1]; 1149 for (j=a->i[i]; j<a->i[i+1]; j++) { 1150 if (a->j[j] == i) { 1151 a->diag[i] = j; 1152 break; 1153 } 1154 } 1155 } 1156 PetscFunctionReturn(0); 1157 } 1158 1159 1160 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1164 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1165 { 1166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1169 PetscInt *tia, *tja; 1170 1171 PetscFunctionBegin; 1172 *nn = n; 1173 if (!ia) PetscFunctionReturn(0); 1174 if (symmetric) { 1175 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1176 nz = tia[n]; 1177 } else { 1178 tia = a->i; tja = a->j; 1179 } 1180 1181 if (!blockcompressed && bs > 1) { 1182 (*nn) *= bs; 1183 nbs = bs; 1184 /* malloc & create the natural set of indices */ 1185 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1186 if (n) { 1187 (*ia)[0] = 0; 1188 for (j=1; j<bs; j++) { 1189 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1190 } 1191 } 1192 1193 for (i=1; i<n; i++) { 1194 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1195 for (j=1; j<bs; j++) { 1196 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1197 } 1198 } 1199 if (n) { 1200 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1201 } 1202 1203 if (ja) { 1204 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1205 cnt = 0; 1206 for (i=0; i<n; i++) { 1207 for (j=0; j<bs; j++) { 1208 for (k=tia[i]; k<tia[i+1]; k++) { 1209 for (l=0; l<bs; l++) { 1210 (*ja)[cnt++] = bs*tja[k] + l; 1211 } 1212 } 1213 } 1214 } 1215 } 1216 1217 n *= bs; 1218 nz *= bs*bs; 1219 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1220 ierr = PetscFree(tia);CHKERRQ(ierr); 1221 ierr = PetscFree(tja);CHKERRQ(ierr); 1222 } 1223 } else if (oshift == 1) { 1224 if (symmetric) { 1225 PetscInt nz = tia[A->rmap->n/bs]; 1226 /* add 1 to i and j indices */ 1227 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1228 *ia = tia; 1229 if (ja) { 1230 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1231 *ja = tja; 1232 } 1233 } else { 1234 PetscInt nz = a->i[A->rmap->n/bs]; 1235 /* malloc space and add 1 to i and j indices */ 1236 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1237 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1238 if (ja) { 1239 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1240 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1241 } 1242 } 1243 } else { 1244 *ia = tia; 1245 if (ja) *ja = tja; 1246 } 1247 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1254 { 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 if (!ia) PetscFunctionReturn(0); 1259 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1260 ierr = PetscFree(*ia);CHKERRQ(ierr); 1261 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1269 { 1270 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1276 #endif 1277 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1278 if (a->row) { 1279 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1280 } 1281 if (a->col) { 1282 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1283 } 1284 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1285 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1286 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1287 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1288 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1289 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1290 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1291 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1292 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1293 1294 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1295 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1296 ierr = PetscFree(a);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 switch (op) { 1319 case MAT_ROW_ORIENTED: 1320 a->roworiented = flg; 1321 break; 1322 case MAT_KEEP_NONZERO_PATTERN: 1323 a->keepnonzeropattern = flg; 1324 break; 1325 case MAT_NEW_NONZERO_LOCATIONS: 1326 a->nonew = (flg ? 0 : 1); 1327 break; 1328 case MAT_NEW_NONZERO_LOCATION_ERR: 1329 a->nonew = (flg ? -1 : 0); 1330 break; 1331 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1332 a->nonew = (flg ? -2 : 0); 1333 break; 1334 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1335 a->nounused = (flg ? -1 : 0); 1336 break; 1337 case MAT_NEW_DIAGONALS: 1338 case MAT_IGNORE_OFF_PROC_ENTRIES: 1339 case MAT_USE_HASH_TABLE: 1340 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1341 break; 1342 case MAT_SYMMETRIC: 1343 case MAT_STRUCTURALLY_SYMMETRIC: 1344 case MAT_HERMITIAN: 1345 case MAT_SYMMETRY_ETERNAL: 1346 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1347 break; 1348 default: 1349 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1357 { 1358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1361 MatScalar *aa,*aa_i; 1362 PetscScalar *v_i; 1363 1364 PetscFunctionBegin; 1365 bs = A->rmap->bs; 1366 ai = a->i; 1367 aj = a->j; 1368 aa = a->a; 1369 bs2 = a->bs2; 1370 1371 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1372 1373 bn = row/bs; /* Block number */ 1374 bp = row % bs; /* Block Position */ 1375 M = ai[bn+1] - ai[bn]; 1376 *nz = bs*M; 1377 1378 if (v) { 1379 *v = 0; 1380 if (*nz) { 1381 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1382 for (i=0; i<M; i++) { /* for each block in the block row */ 1383 v_i = *v + i*bs; 1384 aa_i = aa + bs2*(ai[bn] + i); 1385 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1386 } 1387 } 1388 } 1389 1390 if (idx) { 1391 *idx = 0; 1392 if (*nz) { 1393 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1394 for (i=0; i<M; i++) { /* for each block in the block row */ 1395 idx_i = *idx + i*bs; 1396 itmp = bs*aj[ai[bn] + i]; 1397 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1398 } 1399 } 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1407 { 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1412 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1413 PetscFunctionReturn(0); 1414 } 1415 1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1421 { 1422 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1423 Mat C; 1424 PetscErrorCode ierr; 1425 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1426 PetscInt *rows,*cols,bs2=a->bs2; 1427 MatScalar *array; 1428 1429 PetscFunctionBegin; 1430 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1431 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1432 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1433 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1434 1435 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1436 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1437 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1438 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1439 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1440 ierr = PetscFree(col);CHKERRQ(ierr); 1441 } else { 1442 C = *B; 1443 } 1444 1445 array = a->a; 1446 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1447 for (i=0; i<mbs; i++) { 1448 cols[0] = i*bs; 1449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1450 len = ai[i+1] - ai[i]; 1451 for (j=0; j<len; j++) { 1452 rows[0] = (*aj++)*bs; 1453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1454 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1455 array += bs2; 1456 } 1457 } 1458 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1459 1460 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 1463 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1464 *B = C; 1465 } else { 1466 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1478 int fd; 1479 PetscScalar *aa; 1480 FILE *file; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1484 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1485 col_lens[0] = MAT_FILE_COOKIE; 1486 1487 col_lens[1] = A->rmap->N; 1488 col_lens[2] = A->cmap->n; 1489 col_lens[3] = a->nz*bs2; 1490 1491 /* store lengths of each row and write (including header) to file */ 1492 count = 0; 1493 for (i=0; i<a->mbs; i++) { 1494 for (j=0; j<bs; j++) { 1495 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1496 } 1497 } 1498 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1499 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1500 1501 /* store column indices (zero start index) */ 1502 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1503 count = 0; 1504 for (i=0; i<a->mbs; i++) { 1505 for (j=0; j<bs; j++) { 1506 for (k=a->i[i]; k<a->i[i+1]; k++) { 1507 for (l=0; l<bs; l++) { 1508 jj[count++] = bs*a->j[k] + l; 1509 } 1510 } 1511 } 1512 } 1513 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1514 ierr = PetscFree(jj);CHKERRQ(ierr); 1515 1516 /* store nonzero values */ 1517 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1518 count = 0; 1519 for (i=0; i<a->mbs; i++) { 1520 for (j=0; j<bs; j++) { 1521 for (k=a->i[i]; k<a->i[i+1]; k++) { 1522 for (l=0; l<bs; l++) { 1523 aa[count++] = a->a[bs2*k + l*bs + j]; 1524 } 1525 } 1526 } 1527 } 1528 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1529 ierr = PetscFree(aa);CHKERRQ(ierr); 1530 1531 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1532 if (file) { 1533 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1545 PetscViewerFormat format; 1546 1547 PetscFunctionBegin; 1548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1550 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1552 Mat aij; 1553 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1555 ierr = MatDestroy(aij);CHKERRQ(ierr); 1556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1557 PetscFunctionReturn(0); 1558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1560 for (i=0; i<a->mbs; i++) { 1561 for (j=0; j<bs; j++) { 1562 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1563 for (k=a->i[i]; k<a->i[i+1]; k++) { 1564 for (l=0; l<bs; l++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } 1575 #else 1576 if (a->a[bs2*k + l*bs + j] != 0.0) { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1578 } 1579 #endif 1580 } 1581 } 1582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1583 } 1584 } 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1586 } else { 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1588 for (i=0; i<a->mbs; i++) { 1589 for (j=0; j<bs; j++) { 1590 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1591 for (k=a->i[i]; k<a->i[i+1]; k++) { 1592 for (l=0; l<bs; l++) { 1593 #if defined(PETSC_USE_COMPLEX) 1594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1595 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1598 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1600 } else { 1601 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } 1603 #else 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1605 #endif 1606 } 1607 } 1608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1609 } 1610 } 1611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1612 } 1613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1620 { 1621 Mat A = (Mat) Aa; 1622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1623 PetscErrorCode ierr; 1624 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1625 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1626 MatScalar *aa; 1627 PetscViewer viewer; 1628 1629 PetscFunctionBegin; 1630 1631 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1632 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1633 1634 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1635 1636 /* loop over matrix elements drawing boxes */ 1637 color = PETSC_DRAW_BLUE; 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 if (PetscRealPart(*aa++) >= 0.) continue; 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 color = PETSC_DRAW_CYAN; 1652 for (i=0,row=0; i<mbs; i++,row+=bs) { 1653 for (j=a->i[i]; j<a->i[i+1]; j++) { 1654 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1655 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1656 aa = a->a + j*bs2; 1657 for (k=0; k<bs; k++) { 1658 for (l=0; l<bs; l++) { 1659 if (PetscRealPart(*aa++) != 0.) continue; 1660 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1661 } 1662 } 1663 } 1664 } 1665 1666 color = PETSC_DRAW_RED; 1667 for (i=0,row=0; i<mbs; i++,row+=bs) { 1668 for (j=a->i[i]; j<a->i[i+1]; j++) { 1669 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1670 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1671 aa = a->a + j*bs2; 1672 for (k=0; k<bs; k++) { 1673 for (l=0; l<bs; l++) { 1674 if (PetscRealPart(*aa++) <= 0.) continue; 1675 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1676 } 1677 } 1678 } 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1686 { 1687 PetscErrorCode ierr; 1688 PetscReal xl,yl,xr,yr,w,h; 1689 PetscDraw draw; 1690 PetscTruth isnull; 1691 1692 PetscFunctionBegin; 1693 1694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1696 1697 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1698 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1699 xr += w; yr += h; xl = -w; yl = -h; 1700 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1701 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatView_SeqBAIJ" 1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1709 { 1710 PetscErrorCode ierr; 1711 PetscTruth iascii,isbinary,isdraw; 1712 1713 PetscFunctionBegin; 1714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1717 if (iascii){ 1718 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1719 } else if (isbinary) { 1720 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1721 } else if (isdraw) { 1722 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1723 } else { 1724 Mat B; 1725 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1726 ierr = MatView(B,viewer);CHKERRQ(ierr); 1727 ierr = MatDestroy(B);CHKERRQ(ierr); 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1738 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1739 PetscInt *ai = a->i,*ailen = a->ilen; 1740 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1741 MatScalar *ap,*aa = a->a; 1742 1743 PetscFunctionBegin; 1744 for (k=0; k<m; k++) { /* loop over rows */ 1745 row = im[k]; brow = row/bs; 1746 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1747 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1748 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1749 nrow = ailen[brow]; 1750 for (l=0; l<n; l++) { /* loop over columns */ 1751 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1752 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1753 col = in[l] ; 1754 bcol = col/bs; 1755 cidx = col%bs; 1756 ridx = row%bs; 1757 high = nrow; 1758 low = 0; /* assume unsorted */ 1759 while (high-low > 5) { 1760 t = (low+high)/2; 1761 if (rp[t] > bcol) high = t; 1762 else low = t; 1763 } 1764 for (i=low; i<high; i++) { 1765 if (rp[i] > bcol) break; 1766 if (rp[i] == bcol) { 1767 *v++ = ap[bs2*i+bs*cidx+ridx]; 1768 goto finished; 1769 } 1770 } 1771 *v++ = 0.0; 1772 finished:; 1773 } 1774 } 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #define CHUNKSIZE 10 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1782 { 1783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1784 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1785 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1786 PetscErrorCode ierr; 1787 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1788 PetscTruth roworiented=a->roworiented; 1789 const PetscScalar *value = v; 1790 MatScalar *ap,*aa = a->a,*bap; 1791 1792 PetscFunctionBegin; 1793 if (roworiented) { 1794 stepval = (n-1)*bs; 1795 } else { 1796 stepval = (m-1)*bs; 1797 } 1798 for (k=0; k<m; k++) { /* loop over added rows */ 1799 row = im[k]; 1800 if (row < 0) continue; 1801 #if defined(PETSC_USE_DEBUG) 1802 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1803 #endif 1804 rp = aj + ai[row]; 1805 ap = aa + bs2*ai[row]; 1806 rmax = imax[row]; 1807 nrow = ailen[row]; 1808 low = 0; 1809 high = nrow; 1810 for (l=0; l<n; l++) { /* loop over added columns */ 1811 if (in[l] < 0) continue; 1812 #if defined(PETSC_USE_DEBUG) 1813 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1814 #endif 1815 col = in[l]; 1816 if (roworiented) { 1817 value = v + k*(stepval+bs)*bs + l*bs; 1818 } else { 1819 value = v + l*(stepval+bs)*bs + k*bs; 1820 } 1821 if (col <= lastcol) low = 0; else high = nrow; 1822 lastcol = col; 1823 while (high-low > 7) { 1824 t = (low+high)/2; 1825 if (rp[t] > col) high = t; 1826 else low = t; 1827 } 1828 for (i=low; i<high; i++) { 1829 if (rp[i] > col) break; 1830 if (rp[i] == col) { 1831 bap = ap + bs2*i; 1832 if (roworiented) { 1833 if (is == ADD_VALUES) { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] += *value++; 1837 } 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] = *value++; 1843 } 1844 } 1845 } 1846 } else { 1847 if (is == ADD_VALUES) { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 *bap++ += *value++; 1851 } 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 *bap++ = *value++; 1857 } 1858 } 1859 } 1860 } 1861 goto noinsert2; 1862 } 1863 } 1864 if (nonew == 1) goto noinsert2; 1865 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 N = nrow++ - 1; high++; 1868 /* shift up all the later entries in this row */ 1869 for (ii=N; ii>=i; ii--) { 1870 rp[ii+1] = rp[ii]; 1871 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1872 } 1873 if (N >= i) { 1874 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 rp[i] = col; 1877 bap = ap + bs2*i; 1878 if (roworiented) { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=ii; jj<bs2; jj+=bs) { 1881 bap[jj] = *value++; 1882 } 1883 } 1884 } else { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=0; jj<bs; jj++) { 1887 *bap++ = *value++; 1888 } 1889 } 1890 } 1891 noinsert2:; 1892 low = i; 1893 } 1894 ailen[row] = nrow; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1905 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1906 PetscErrorCode ierr; 1907 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1908 MatScalar *aa = a->a,*ap; 1909 PetscReal ratio=0.6; 1910 1911 PetscFunctionBegin; 1912 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1913 1914 if (m) rmax = ailen[0]; 1915 for (i=1; i<mbs; i++) { 1916 /* move each row back by the amount of empty slots (fshift) before it*/ 1917 fshift += imax[i-1] - ailen[i-1]; 1918 rmax = PetscMax(rmax,ailen[i]); 1919 if (fshift) { 1920 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1921 N = ailen[i]; 1922 for (j=0; j<N; j++) { 1923 ip[j-fshift] = ip[j]; 1924 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1925 } 1926 } 1927 ai[i] = ai[i-1] + ailen[i-1]; 1928 } 1929 if (mbs) { 1930 fshift += imax[mbs-1] - ailen[mbs-1]; 1931 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1932 } 1933 /* reset ilen and imax for each row */ 1934 for (i=0; i<mbs; i++) { 1935 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1936 } 1937 a->nz = ai[mbs]; 1938 1939 /* diagonals may have moved, so kill the diagonal pointers */ 1940 a->idiagvalid = PETSC_FALSE; 1941 if (fshift && a->diag) { 1942 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1944 a->diag = 0; 1945 } 1946 if (fshift && a->nounused == -1) { 1947 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 } 1949 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1952 a->reallocs = 0; 1953 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1954 1955 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1956 if (a->compressedrow.use){ 1957 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1958 } 1959 1960 A->same_nonzero = PETSC_TRUE; 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* 1965 This function returns an array of flags which indicate the locations of contiguous 1966 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1967 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1968 Assume: sizes should be long enough to hold all the values. 1969 */ 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1973 { 1974 PetscInt i,j,k,row; 1975 PetscTruth flg; 1976 1977 PetscFunctionBegin; 1978 for (i=0,j=0; i<n; j++) { 1979 row = idx[i]; 1980 if (row%bs!=0) { /* Not the begining of a block */ 1981 sizes[j] = 1; 1982 i++; 1983 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1984 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1985 i++; 1986 } else { /* Begining of the block, so check if the complete block exists */ 1987 flg = PETSC_TRUE; 1988 for (k=1; k<bs; k++) { 1989 if (row+k != idx[i+k]) { /* break in the block */ 1990 flg = PETSC_FALSE; 1991 break; 1992 } 1993 } 1994 if (flg) { /* No break in the bs */ 1995 sizes[j] = bs; 1996 i+= bs; 1997 } else { 1998 sizes[j] = 1; 1999 i++; 2000 } 2001 } 2002 } 2003 *bs_max = j; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2010 { 2011 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2012 PetscErrorCode ierr; 2013 PetscInt i,j,k,count,*rows; 2014 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2015 PetscScalar zero = 0.0; 2016 MatScalar *aa; 2017 2018 PetscFunctionBegin; 2019 /* Make a copy of the IS and sort it */ 2020 /* allocate memory for rows,sizes */ 2021 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2022 2023 /* copy IS values to rows, and sort them */ 2024 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2025 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2026 if (baij->keepnonzeropattern) { 2027 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2028 bs_max = is_n; 2029 A->same_nonzero = PETSC_TRUE; 2030 } else { 2031 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2032 A->same_nonzero = PETSC_FALSE; 2033 } 2034 2035 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2036 row = rows[j]; 2037 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2038 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2039 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2040 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2041 if (diag != 0.0) { 2042 if (baij->ilen[row/bs] > 0) { 2043 baij->ilen[row/bs] = 1; 2044 baij->j[baij->i[row/bs]] = row/bs; 2045 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2046 } 2047 /* Now insert all the diagonal values for this bs */ 2048 for (k=0; k<bs; k++) { 2049 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2050 } 2051 } else { /* (diag == 0.0) */ 2052 baij->ilen[row/bs] = 0; 2053 } /* end (diag == 0.0) */ 2054 } else { /* (sizes[i] != bs) */ 2055 #if defined (PETSC_USE_DEBUG) 2056 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2057 #endif 2058 for (k=0; k<count; k++) { 2059 aa[0] = zero; 2060 aa += bs; 2061 } 2062 if (diag != 0.0) { 2063 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2064 } 2065 } 2066 } 2067 2068 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2075 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2076 { 2077 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2078 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2079 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2080 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2081 PetscErrorCode ierr; 2082 PetscInt ridx,cidx,bs2=a->bs2; 2083 PetscTruth roworiented=a->roworiented; 2084 MatScalar *ap,value,*aa=a->a,*bap; 2085 2086 PetscFunctionBegin; 2087 if (v) PetscValidScalarPointer(v,6); 2088 for (k=0; k<m; k++) { /* loop over added rows */ 2089 row = im[k]; 2090 brow = row/bs; 2091 if (row < 0) continue; 2092 #if defined(PETSC_USE_DEBUG) 2093 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2094 #endif 2095 rp = aj + ai[brow]; 2096 ap = aa + bs2*ai[brow]; 2097 rmax = imax[brow]; 2098 nrow = ailen[brow]; 2099 low = 0; 2100 high = nrow; 2101 for (l=0; l<n; l++) { /* loop over added columns */ 2102 if (in[l] < 0) continue; 2103 #if defined(PETSC_USE_DEBUG) 2104 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2105 #endif 2106 col = in[l]; bcol = col/bs; 2107 ridx = row % bs; cidx = col % bs; 2108 if (roworiented) { 2109 value = v[l + k*n]; 2110 } else { 2111 value = v[k + l*m]; 2112 } 2113 if (col <= lastcol) low = 0; else high = nrow; 2114 lastcol = col; 2115 while (high-low > 7) { 2116 t = (low+high)/2; 2117 if (rp[t] > bcol) high = t; 2118 else low = t; 2119 } 2120 for (i=low; i<high; i++) { 2121 if (rp[i] > bcol) break; 2122 if (rp[i] == bcol) { 2123 bap = ap + bs2*i + bs*cidx + ridx; 2124 if (is == ADD_VALUES) *bap += value; 2125 else *bap = value; 2126 goto noinsert1; 2127 } 2128 } 2129 if (nonew == 1) goto noinsert1; 2130 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2131 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2132 N = nrow++ - 1; high++; 2133 /* shift up all the later entries in this row */ 2134 for (ii=N; ii>=i; ii--) { 2135 rp[ii+1] = rp[ii]; 2136 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2137 } 2138 if (N>=i) { 2139 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2140 } 2141 rp[i] = bcol; 2142 ap[bs2*i + bs*cidx + ridx] = value; 2143 a->nz++; 2144 noinsert1:; 2145 low = i; 2146 } 2147 ailen[brow] = nrow; 2148 } 2149 A->same_nonzero = PETSC_FALSE; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2155 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2156 { 2157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2158 Mat outA; 2159 PetscErrorCode ierr; 2160 PetscTruth row_identity,col_identity; 2161 2162 PetscFunctionBegin; 2163 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2164 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2165 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2166 if (!row_identity || !col_identity) { 2167 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2168 } 2169 2170 outA = inA; 2171 inA->factor = MAT_FACTOR_LU; 2172 2173 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2174 2175 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2176 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2177 a->row = row; 2178 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2179 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2180 a->col = col; 2181 2182 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2183 if (a->icol) { 2184 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2185 } 2186 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2187 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2188 2189 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2190 if (!a->solve_work) { 2191 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2192 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2193 } 2194 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2195 2196 PetscFunctionReturn(0); 2197 } 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2203 { 2204 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2205 PetscInt i,nz,mbs; 2206 2207 PetscFunctionBegin; 2208 nz = baij->maxnz/baij->bs2; 2209 mbs = baij->mbs; 2210 for (i=0; i<nz; i++) { 2211 baij->j[i] = indices[i]; 2212 } 2213 baij->nz = nz; 2214 for (i=0; i<mbs; i++) { 2215 baij->ilen[i] = baij->imax[i]; 2216 } 2217 PetscFunctionReturn(0); 2218 } 2219 EXTERN_C_END 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2223 /*@ 2224 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2225 in the matrix. 2226 2227 Input Parameters: 2228 + mat - the SeqBAIJ matrix 2229 - indices - the column indices 2230 2231 Level: advanced 2232 2233 Notes: 2234 This can be called if you have precomputed the nonzero structure of the 2235 matrix and want to provide it to the matrix object to improve the performance 2236 of the MatSetValues() operation. 2237 2238 You MUST have set the correct numbers of nonzeros per row in the call to 2239 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2240 2241 MUST be called before any calls to MatSetValues(); 2242 2243 @*/ 2244 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2245 { 2246 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2247 2248 PetscFunctionBegin; 2249 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2250 PetscValidPointer(indices,2); 2251 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2252 if (f) { 2253 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2254 } else { 2255 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2262 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2263 { 2264 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2267 PetscReal atmp; 2268 PetscScalar *x,zero = 0.0; 2269 MatScalar *aa; 2270 PetscInt ncols,brow,krow,kcol; 2271 2272 PetscFunctionBegin; 2273 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2274 bs = A->rmap->bs; 2275 aa = a->a; 2276 ai = a->i; 2277 aj = a->j; 2278 mbs = a->mbs; 2279 2280 ierr = VecSet(v,zero);CHKERRQ(ierr); 2281 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2282 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2283 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2284 for (i=0; i<mbs; i++) { 2285 ncols = ai[1] - ai[0]; ai++; 2286 brow = bs*i; 2287 for (j=0; j<ncols; j++){ 2288 for (kcol=0; kcol<bs; kcol++){ 2289 for (krow=0; krow<bs; krow++){ 2290 atmp = PetscAbsScalar(*aa);aa++; 2291 row = brow + krow; /* row index */ 2292 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2293 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2294 } 2295 } 2296 aj++; 2297 } 2298 } 2299 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatCopy_SeqBAIJ" 2305 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2306 { 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 /* If the two matrices have the same copy implementation, use fast copy. */ 2311 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2312 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2313 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2314 2315 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2316 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2317 } 2318 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2319 } else { 2320 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2321 } 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2327 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2328 { 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2338 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2339 { 2340 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2341 PetscFunctionBegin; 2342 *array = a->a; 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2348 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2349 { 2350 PetscFunctionBegin; 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2356 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2357 { 2358 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2359 PetscErrorCode ierr; 2360 PetscInt i,bs=Y->rmap->bs,j,bs2; 2361 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2362 2363 PetscFunctionBegin; 2364 if (str == SAME_NONZERO_PATTERN) { 2365 PetscScalar alpha = a; 2366 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2367 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2368 if (y->xtoy && y->XtoY != X) { 2369 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2370 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2371 } 2372 if (!y->xtoy) { /* get xtoy */ 2373 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2374 y->XtoY = X; 2375 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2376 } 2377 bs2 = bs*bs; 2378 for (i=0; i<x->nz; i++) { 2379 j = 0; 2380 while (j < bs2){ 2381 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2382 j++; 2383 } 2384 } 2385 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2386 } else { 2387 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2388 } 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2394 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2395 { 2396 PetscInt rbs,cbs; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2401 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2402 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2403 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2409 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2410 { 2411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2412 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2413 MatScalar *aa = a->a; 2414 2415 PetscFunctionBegin; 2416 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2422 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2423 { 2424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2425 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2426 MatScalar *aa = a->a; 2427 2428 PetscFunctionBegin; 2429 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2437 /* 2438 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2439 */ 2440 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2441 { 2442 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2443 PetscErrorCode ierr; 2444 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2445 PetscInt nz = a->i[m],row,*jj,mr,col; 2446 2447 PetscFunctionBegin; 2448 *nn = n; 2449 if (!ia) PetscFunctionReturn(0); 2450 if (symmetric) { 2451 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2452 } else { 2453 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2454 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2455 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2456 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2457 jj = a->j; 2458 for (i=0; i<nz; i++) { 2459 collengths[jj[i]]++; 2460 } 2461 cia[0] = oshift; 2462 for (i=0; i<n; i++) { 2463 cia[i+1] = cia[i] + collengths[i]; 2464 } 2465 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2466 jj = a->j; 2467 for (row=0; row<m; row++) { 2468 mr = a->i[row+1] - a->i[row]; 2469 for (i=0; i<mr; i++) { 2470 col = *jj++; 2471 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2472 } 2473 } 2474 ierr = PetscFree(collengths);CHKERRQ(ierr); 2475 *ia = cia; *ja = cja; 2476 } 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2482 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2483 { 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 if (!ia) PetscFunctionReturn(0); 2488 ierr = PetscFree(*ia);CHKERRQ(ierr); 2489 ierr = PetscFree(*ja);CHKERRQ(ierr); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNCT__ 2494 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2495 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2496 { 2497 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2498 PetscErrorCode ierr; 2499 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2500 PetscScalar dx,*y,*xx,*w3_array; 2501 PetscScalar *vscale_array; 2502 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2503 Vec w1=coloring->w1,w2=coloring->w2,w3; 2504 void *fctx = coloring->fctx; 2505 PetscTruth flg = PETSC_FALSE; 2506 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2507 Vec x1_tmp; 2508 2509 PetscFunctionBegin; 2510 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2511 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2512 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2513 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2514 2515 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2516 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2517 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2518 if (flg) { 2519 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2520 } else { 2521 PetscTruth assembled; 2522 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2523 if (assembled) { 2524 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2525 } 2526 } 2527 2528 x1_tmp = x1; 2529 if (!coloring->vscale){ 2530 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2531 } 2532 2533 /* 2534 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2535 coloring->F for the coarser grids from the finest 2536 */ 2537 if (coloring->F) { 2538 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2539 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2540 if (m1 != m2) { 2541 coloring->F = 0; 2542 } 2543 } 2544 2545 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2546 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2547 } 2548 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2549 2550 /* Set w1 = F(x1) */ 2551 if (coloring->F) { 2552 w1 = coloring->F; /* use already computed value of function */ 2553 coloring->F = 0; 2554 } else { 2555 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2556 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2557 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2558 } 2559 2560 if (!coloring->w3) { 2561 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2562 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2563 } 2564 w3 = coloring->w3; 2565 2566 CHKMEMQ; 2567 /* Compute all the local scale factors, including ghost points */ 2568 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2569 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2570 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2571 if (ctype == IS_COLORING_GHOSTED){ 2572 col_start = 0; col_end = N; 2573 } else if (ctype == IS_COLORING_GLOBAL){ 2574 xx = xx - start; 2575 vscale_array = vscale_array - start; 2576 col_start = start; col_end = N + start; 2577 } CHKMEMQ; 2578 for (col=col_start; col<col_end; col++){ 2579 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2580 if (coloring->htype[0] == 'w') { 2581 dx = 1.0 + unorm; 2582 } else { 2583 dx = xx[col]; 2584 } 2585 if (dx == 0.0) dx = 1.0; 2586 #if !defined(PETSC_USE_COMPLEX) 2587 if (dx < umin && dx >= 0.0) dx = umin; 2588 else if (dx < 0.0 && dx > -umin) dx = -umin; 2589 #else 2590 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2591 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2592 #endif 2593 dx *= epsilon; 2594 vscale_array[col] = 1.0/dx; 2595 } CHKMEMQ; 2596 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2597 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2598 if (ctype == IS_COLORING_GLOBAL){ 2599 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2600 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2601 } 2602 CHKMEMQ; 2603 if (coloring->vscaleforrow) { 2604 vscaleforrow = coloring->vscaleforrow; 2605 } else { 2606 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2607 } 2608 2609 2610 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2611 /* 2612 Loop over each color 2613 */ 2614 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2615 for (k=0; k<coloring->ncolors; k++) { 2616 coloring->currentcolor = k; 2617 for (i=0; i<bs; i++) { 2618 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2619 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2620 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2621 /* 2622 Loop over each column associated with color 2623 adding the perturbation to the vector w3. 2624 */ 2625 for (l=0; l<coloring->ncolumns[k]; l++) { 2626 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2627 if (coloring->htype[0] == 'w') { 2628 dx = 1.0 + unorm; 2629 } else { 2630 dx = xx[col]; 2631 } 2632 if (dx == 0.0) dx = 1.0; 2633 #if !defined(PETSC_USE_COMPLEX) 2634 if (dx < umin && dx >= 0.0) dx = umin; 2635 else if (dx < 0.0 && dx > -umin) dx = -umin; 2636 #else 2637 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2638 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2639 #endif 2640 dx *= epsilon; 2641 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2642 w3_array[col] += dx; 2643 } 2644 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2645 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2646 2647 /* 2648 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2649 w2 = F(x1 + dx) - F(x1) 2650 */ 2651 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2652 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2653 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2654 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2655 2656 /* 2657 Loop over rows of vector, putting results into Jacobian matrix 2658 */ 2659 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2660 for (l=0; l<coloring->nrows[k]; l++) { 2661 row = bs*coloring->rows[k][l]; /* local row index */ 2662 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2663 for (j=0; j<bs; j++) { 2664 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2665 srows[j] = row + start + j; 2666 } 2667 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2668 } 2669 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2670 } 2671 } /* endof for each color */ 2672 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2673 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2674 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2675 ierr = PetscFree(srows);CHKERRQ(ierr); 2676 2677 coloring->currentcolor = -1; 2678 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2681 PetscFunctionReturn(0); 2682 } 2683 2684 /* -------------------------------------------------------------------*/ 2685 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2686 MatGetRow_SeqBAIJ, 2687 MatRestoreRow_SeqBAIJ, 2688 MatMult_SeqBAIJ_N, 2689 /* 4*/ MatMultAdd_SeqBAIJ_N, 2690 MatMultTranspose_SeqBAIJ, 2691 MatMultTransposeAdd_SeqBAIJ, 2692 0, 2693 0, 2694 0, 2695 /*10*/ 0, 2696 MatLUFactor_SeqBAIJ, 2697 0, 2698 0, 2699 MatTranspose_SeqBAIJ, 2700 /*15*/ MatGetInfo_SeqBAIJ, 2701 MatEqual_SeqBAIJ, 2702 MatGetDiagonal_SeqBAIJ, 2703 MatDiagonalScale_SeqBAIJ, 2704 MatNorm_SeqBAIJ, 2705 /*20*/ 0, 2706 MatAssemblyEnd_SeqBAIJ, 2707 MatSetOption_SeqBAIJ, 2708 MatZeroEntries_SeqBAIJ, 2709 /*24*/ MatZeroRows_SeqBAIJ, 2710 0, 2711 0, 2712 0, 2713 0, 2714 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2715 0, 2716 0, 2717 MatGetArray_SeqBAIJ, 2718 MatRestoreArray_SeqBAIJ, 2719 /*34*/ MatDuplicate_SeqBAIJ, 2720 0, 2721 0, 2722 MatILUFactor_SeqBAIJ, 2723 0, 2724 /*39*/ MatAXPY_SeqBAIJ, 2725 MatGetSubMatrices_SeqBAIJ, 2726 MatIncreaseOverlap_SeqBAIJ, 2727 MatGetValues_SeqBAIJ, 2728 MatCopy_SeqBAIJ, 2729 /*44*/ 0, 2730 MatScale_SeqBAIJ, 2731 0, 2732 0, 2733 0, 2734 /*49*/ MatSetBlockSize_SeqBAIJ, 2735 MatGetRowIJ_SeqBAIJ, 2736 MatRestoreRowIJ_SeqBAIJ, 2737 MatGetColumnIJ_SeqBAIJ, 2738 MatRestoreColumnIJ_SeqBAIJ, 2739 /*54*/ MatFDColoringCreate_SeqAIJ, 2740 0, 2741 0, 2742 0, 2743 MatSetValuesBlocked_SeqBAIJ, 2744 /*59*/ MatGetSubMatrix_SeqBAIJ, 2745 MatDestroy_SeqBAIJ, 2746 MatView_SeqBAIJ, 2747 0, 2748 0, 2749 /*64*/ 0, 2750 0, 2751 0, 2752 0, 2753 0, 2754 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2755 0, 2756 MatConvert_Basic, 2757 0, 2758 0, 2759 /*74*/ 0, 2760 MatFDColoringApply_BAIJ, 2761 0, 2762 0, 2763 0, 2764 /*79*/ 0, 2765 0, 2766 0, 2767 0, 2768 MatLoad_SeqBAIJ, 2769 /*84*/ 0, 2770 0, 2771 0, 2772 0, 2773 0, 2774 /*89*/ 0, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*94*/ 0, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*99*/0, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*104*/0, 2790 MatRealPart_SeqBAIJ, 2791 MatImaginaryPart_SeqBAIJ, 2792 0, 2793 0, 2794 /*109*/0, 2795 0, 2796 0, 2797 0, 2798 MatMissingDiagonal_SeqBAIJ, 2799 /*114*/0, 2800 0, 2801 0, 2802 0, 2803 0, 2804 /*119*/0, 2805 0, 2806 MatMultHermitianTranspose_SeqBAIJ, 2807 MatMultHermitianTransposeAdd_SeqBAIJ 2808 }; 2809 2810 EXTERN_C_BEGIN 2811 #undef __FUNCT__ 2812 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2813 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2814 { 2815 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2816 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2817 PetscErrorCode ierr; 2818 2819 PetscFunctionBegin; 2820 if (aij->nonew != 1) { 2821 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2822 } 2823 2824 /* allocate space for values if not already there */ 2825 if (!aij->saved_values) { 2826 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2827 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2828 } 2829 2830 /* copy values over */ 2831 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 EXTERN_C_END 2835 2836 EXTERN_C_BEGIN 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2839 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2840 { 2841 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2842 PetscErrorCode ierr; 2843 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2844 2845 PetscFunctionBegin; 2846 if (aij->nonew != 1) { 2847 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2848 } 2849 if (!aij->saved_values) { 2850 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2851 } 2852 2853 /* copy values over */ 2854 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 EXTERN_C_END 2858 2859 EXTERN_C_BEGIN 2860 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2862 EXTERN_C_END 2863 2864 EXTERN_C_BEGIN 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2868 { 2869 Mat_SeqBAIJ *b; 2870 PetscErrorCode ierr; 2871 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2872 PetscTruth flg,skipallocation = PETSC_FALSE; 2873 2874 PetscFunctionBegin; 2875 2876 if (nz == MAT_SKIP_ALLOCATION) { 2877 skipallocation = PETSC_TRUE; 2878 nz = 0; 2879 } 2880 2881 if (bs < 0) { 2882 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2883 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2884 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2885 bs = PetscAbs(bs); 2886 } 2887 if (nnz && newbs != bs) { 2888 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2889 } 2890 bs = newbs; 2891 2892 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2894 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2895 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2896 2897 B->preallocated = PETSC_TRUE; 2898 2899 mbs = B->rmap->n/bs; 2900 nbs = B->cmap->n/bs; 2901 bs2 = bs*bs; 2902 2903 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2904 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2905 } 2906 2907 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2908 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2909 if (nnz) { 2910 for (i=0; i<mbs; i++) { 2911 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2912 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2913 } 2914 } 2915 2916 b = (Mat_SeqBAIJ*)B->data; 2917 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2918 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2919 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2920 2921 if (!flg) { 2922 switch (bs) { 2923 case 1: 2924 B->ops->mult = MatMult_SeqBAIJ_1; 2925 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2926 B->ops->sor = MatSOR_SeqBAIJ_1; 2927 break; 2928 case 2: 2929 B->ops->mult = MatMult_SeqBAIJ_2; 2930 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2931 B->ops->sor = MatSOR_SeqBAIJ_2; 2932 break; 2933 case 3: 2934 B->ops->mult = MatMult_SeqBAIJ_3; 2935 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2936 B->ops->sor = MatSOR_SeqBAIJ_3; 2937 break; 2938 case 4: 2939 B->ops->mult = MatMult_SeqBAIJ_4; 2940 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2941 B->ops->sor = MatSOR_SeqBAIJ_4; 2942 break; 2943 case 5: 2944 B->ops->mult = MatMult_SeqBAIJ_5; 2945 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2946 B->ops->sor = MatSOR_SeqBAIJ_5; 2947 break; 2948 case 6: 2949 B->ops->mult = MatMult_SeqBAIJ_6; 2950 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2951 B->ops->sor = MatSOR_SeqBAIJ_6; 2952 break; 2953 case 7: 2954 B->ops->mult = MatMult_SeqBAIJ_7; 2955 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2956 B->ops->sor = MatSOR_SeqBAIJ_7; 2957 break; 2958 case 15: 2959 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2960 break; 2961 default: 2962 B->ops->mult = MatMult_SeqBAIJ_N; 2963 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2964 break; 2965 } 2966 } 2967 B->rmap->bs = bs; 2968 b->mbs = mbs; 2969 b->nbs = nbs; 2970 if (!skipallocation) { 2971 if (!b->imax) { 2972 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2973 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2974 b->free_imax_ilen = PETSC_TRUE; 2975 } 2976 /* b->ilen will count nonzeros in each block row so far. */ 2977 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2978 if (!nnz) { 2979 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2980 else if (nz <= 0) nz = 1; 2981 for (i=0; i<mbs; i++) b->imax[i] = nz; 2982 nz = nz*mbs; 2983 } else { 2984 nz = 0; 2985 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2986 } 2987 2988 /* allocate the matrix space */ 2989 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2990 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2991 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2992 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2993 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2994 b->singlemalloc = PETSC_TRUE; 2995 b->i[0] = 0; 2996 for (i=1; i<mbs+1; i++) { 2997 b->i[i] = b->i[i-1] + b->imax[i-1]; 2998 } 2999 b->free_a = PETSC_TRUE; 3000 b->free_ij = PETSC_TRUE; 3001 } else { 3002 b->free_a = PETSC_FALSE; 3003 b->free_ij = PETSC_FALSE; 3004 } 3005 3006 B->rmap->bs = bs; 3007 b->bs2 = bs2; 3008 b->mbs = mbs; 3009 b->nz = 0; 3010 b->maxnz = nz*bs2; 3011 B->info.nz_unneeded = (PetscReal)b->maxnz; 3012 PetscFunctionReturn(0); 3013 } 3014 EXTERN_C_END 3015 3016 EXTERN_C_BEGIN 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3019 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3020 { 3021 PetscInt i,m,nz,nz_max=0,*nnz; 3022 PetscScalar *values=0; 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 3027 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3028 3029 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3031 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3032 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3033 m = B->rmap->n/bs; 3034 3035 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3036 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3037 for(i=0; i<m; i++) { 3038 nz = ii[i+1]- ii[i]; 3039 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3040 nz_max = PetscMax(nz_max, nz); 3041 nnz[i] = nz; 3042 } 3043 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3044 ierr = PetscFree(nnz);CHKERRQ(ierr); 3045 3046 values = (PetscScalar*)V; 3047 if (!values) { 3048 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3049 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3050 } 3051 for (i=0; i<m; i++) { 3052 PetscInt ncols = ii[i+1] - ii[i]; 3053 const PetscInt *icols = jj + ii[i]; 3054 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3055 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 } 3057 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3058 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3069 EXTERN_C_END 3070 3071 /*MC 3072 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3073 block sparse compressed row format. 3074 3075 Options Database Keys: 3076 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3077 3078 Level: beginner 3079 3080 .seealso: MatCreateSeqBAIJ() 3081 M*/ 3082 3083 3084 EXTERN_C_BEGIN 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatCreate_SeqBAIJ" 3087 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3088 { 3089 PetscErrorCode ierr; 3090 PetscMPIInt size; 3091 Mat_SeqBAIJ *b; 3092 3093 PetscFunctionBegin; 3094 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3095 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3096 3097 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3098 B->data = (void*)b; 3099 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3100 B->mapping = 0; 3101 b->row = 0; 3102 b->col = 0; 3103 b->icol = 0; 3104 b->reallocs = 0; 3105 b->saved_values = 0; 3106 3107 b->roworiented = PETSC_TRUE; 3108 b->nonew = 0; 3109 b->diag = 0; 3110 b->solve_work = 0; 3111 b->mult_work = 0; 3112 B->spptr = 0; 3113 B->info.nz_unneeded = (PetscReal)b->maxnz; 3114 b->keepnonzeropattern = PETSC_FALSE; 3115 b->xtoy = 0; 3116 b->XtoY = 0; 3117 b->compressedrow.use = PETSC_FALSE; 3118 b->compressedrow.nrows = 0; 3119 b->compressedrow.i = PETSC_NULL; 3120 b->compressedrow.rindex = PETSC_NULL; 3121 b->compressedrow.checked = PETSC_FALSE; 3122 B->same_nonzero = PETSC_FALSE; 3123 3124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3125 "MatGetFactorAvailable_seqbaij_petsc", 3126 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3128 "MatGetFactor_seqbaij_petsc", 3129 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3131 "MatInvertBlockDiagonal_SeqBAIJ", 3132 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3134 "MatStoreValues_SeqBAIJ", 3135 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3137 "MatRetrieveValues_SeqBAIJ", 3138 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3139 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3140 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3141 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3142 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3143 "MatConvert_SeqBAIJ_SeqAIJ", 3144 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3145 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3146 "MatConvert_SeqBAIJ_SeqSBAIJ", 3147 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3149 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3150 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3152 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3153 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3154 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3155 PetscFunctionReturn(0); 3156 } 3157 EXTERN_C_END 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3161 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3162 { 3163 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3164 PetscErrorCode ierr; 3165 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3166 3167 PetscFunctionBegin; 3168 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3169 3170 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3171 c->imax = a->imax; 3172 c->ilen = a->ilen; 3173 c->free_imax_ilen = PETSC_FALSE; 3174 } else { 3175 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3176 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3177 for (i=0; i<mbs; i++) { 3178 c->imax[i] = a->imax[i]; 3179 c->ilen[i] = a->ilen[i]; 3180 } 3181 c->free_imax_ilen = PETSC_TRUE; 3182 } 3183 3184 /* allocate the matrix space */ 3185 if (mallocmatspace){ 3186 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3187 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3188 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3189 c->singlemalloc = PETSC_FALSE; 3190 c->free_ij = PETSC_FALSE; 3191 c->i = a->i; 3192 c->j = a->j; 3193 c->parent = A; 3194 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3195 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3196 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3197 } else { 3198 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3199 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3200 c->singlemalloc = PETSC_TRUE; 3201 c->free_ij = PETSC_TRUE; 3202 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3203 if (mbs > 0) { 3204 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3205 if (cpvalues == MAT_COPY_VALUES) { 3206 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3207 } else { 3208 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3209 } 3210 } 3211 } 3212 } 3213 3214 c->roworiented = a->roworiented; 3215 c->nonew = a->nonew; 3216 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3217 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3218 c->bs2 = a->bs2; 3219 c->mbs = a->mbs; 3220 c->nbs = a->nbs; 3221 3222 if (a->diag) { 3223 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3224 c->diag = a->diag; 3225 c->free_diag = PETSC_FALSE; 3226 } else { 3227 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3228 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3229 for (i=0; i<mbs; i++) { 3230 c->diag[i] = a->diag[i]; 3231 } 3232 c->free_diag = PETSC_TRUE; 3233 } 3234 } else c->diag = 0; 3235 c->nz = a->nz; 3236 c->maxnz = a->maxnz; 3237 c->solve_work = 0; 3238 c->mult_work = 0; 3239 c->free_a = PETSC_TRUE; 3240 c->free_ij = PETSC_TRUE; 3241 C->preallocated = PETSC_TRUE; 3242 C->assembled = PETSC_TRUE; 3243 3244 c->compressedrow.use = a->compressedrow.use; 3245 c->compressedrow.nrows = a->compressedrow.nrows; 3246 c->compressedrow.checked = a->compressedrow.checked; 3247 if (a->compressedrow.checked && a->compressedrow.use){ 3248 i = a->compressedrow.nrows; 3249 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3252 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3253 } else { 3254 c->compressedrow.use = PETSC_FALSE; 3255 c->compressedrow.i = PETSC_NULL; 3256 c->compressedrow.rindex = PETSC_NULL; 3257 } 3258 C->same_nonzero = A->same_nonzero; 3259 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3260 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3266 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3267 { 3268 PetscErrorCode ierr; 3269 3270 PetscFunctionBegin; 3271 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3272 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3273 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3274 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatLoad_SeqBAIJ" 3280 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3281 { 3282 Mat_SeqBAIJ *a; 3283 Mat B; 3284 PetscErrorCode ierr; 3285 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3286 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3287 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3288 PetscInt *masked,nmask,tmp,bs2,ishift; 3289 PetscMPIInt size; 3290 int fd; 3291 PetscScalar *aa; 3292 MPI_Comm comm = ((PetscObject)viewer)->comm; 3293 3294 PetscFunctionBegin; 3295 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3296 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3297 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3298 bs2 = bs*bs; 3299 3300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3301 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3302 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3303 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3304 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3305 M = header[1]; N = header[2]; nz = header[3]; 3306 3307 if (header[3] < 0) { 3308 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3309 } 3310 3311 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3312 3313 /* 3314 This code adds extra rows to make sure the number of rows is 3315 divisible by the blocksize 3316 */ 3317 mbs = M/bs; 3318 extra_rows = bs - M + bs*(mbs); 3319 if (extra_rows == bs) extra_rows = 0; 3320 else mbs++; 3321 if (extra_rows) { 3322 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3323 } 3324 3325 /* read in row lengths */ 3326 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3327 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3328 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3329 3330 /* read in column indices */ 3331 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3332 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3333 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3334 3335 /* loop over row lengths determining block row lengths */ 3336 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3337 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3339 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3340 rowcount = 0; 3341 nzcount = 0; 3342 for (i=0; i<mbs; i++) { 3343 nmask = 0; 3344 for (j=0; j<bs; j++) { 3345 kmax = rowlengths[rowcount]; 3346 for (k=0; k<kmax; k++) { 3347 tmp = jj[nzcount++]/bs; 3348 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3349 } 3350 rowcount++; 3351 } 3352 browlengths[i] += nmask; 3353 /* zero out the mask elements we set */ 3354 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3355 } 3356 3357 /* create our matrix */ 3358 ierr = MatCreate(comm,&B); 3359 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3360 ierr = MatSetType(B,type);CHKERRQ(ierr); 3361 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3362 a = (Mat_SeqBAIJ*)B->data; 3363 3364 /* set matrix "i" values */ 3365 a->i[0] = 0; 3366 for (i=1; i<= mbs; i++) { 3367 a->i[i] = a->i[i-1] + browlengths[i-1]; 3368 a->ilen[i-1] = browlengths[i-1]; 3369 } 3370 a->nz = 0; 3371 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3372 3373 /* read in nonzero values */ 3374 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3375 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3376 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3377 3378 /* set "a" and "j" values into matrix */ 3379 nzcount = 0; jcount = 0; 3380 for (i=0; i<mbs; i++) { 3381 nzcountb = nzcount; 3382 nmask = 0; 3383 for (j=0; j<bs; j++) { 3384 kmax = rowlengths[i*bs+j]; 3385 for (k=0; k<kmax; k++) { 3386 tmp = jj[nzcount++]/bs; 3387 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3388 } 3389 } 3390 /* sort the masked values */ 3391 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3392 3393 /* set "j" values into matrix */ 3394 maskcount = 1; 3395 for (j=0; j<nmask; j++) { 3396 a->j[jcount++] = masked[j]; 3397 mask[masked[j]] = maskcount++; 3398 } 3399 /* set "a" values into matrix */ 3400 ishift = bs2*a->i[i]; 3401 for (j=0; j<bs; j++) { 3402 kmax = rowlengths[i*bs+j]; 3403 for (k=0; k<kmax; k++) { 3404 tmp = jj[nzcountb]/bs ; 3405 block = mask[tmp] - 1; 3406 point = jj[nzcountb] - bs*tmp; 3407 idx = ishift + bs2*block + j + bs*point; 3408 a->a[idx] = (MatScalar)aa[nzcountb++]; 3409 } 3410 } 3411 /* zero out the mask elements we set */ 3412 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3413 } 3414 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3415 3416 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3417 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3418 ierr = PetscFree(aa);CHKERRQ(ierr); 3419 ierr = PetscFree(jj);CHKERRQ(ierr); 3420 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3421 3422 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3423 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 ierr = MatView_Private(B);CHKERRQ(ierr); 3425 3426 *A = B; 3427 PetscFunctionReturn(0); 3428 } 3429 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "MatCreateSeqBAIJ" 3432 /*@C 3433 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3434 compressed row) format. For good matrix assembly performance the 3435 user should preallocate the matrix storage by setting the parameter nz 3436 (or the array nnz). By setting these parameters accurately, performance 3437 during matrix assembly can be increased by more than a factor of 50. 3438 3439 Collective on MPI_Comm 3440 3441 Input Parameters: 3442 + comm - MPI communicator, set to PETSC_COMM_SELF 3443 . bs - size of block 3444 . m - number of rows 3445 . n - number of columns 3446 . nz - number of nonzero blocks per block row (same for all rows) 3447 - nnz - array containing the number of nonzero blocks in the various block rows 3448 (possibly different for each block row) or PETSC_NULL 3449 3450 Output Parameter: 3451 . A - the matrix 3452 3453 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3454 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3455 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3456 3457 Options Database Keys: 3458 . -mat_no_unroll - uses code that does not unroll the loops in the 3459 block calculations (much slower) 3460 . -mat_block_size - size of the blocks to use 3461 3462 Level: intermediate 3463 3464 Notes: 3465 The number of rows and columns must be divisible by blocksize. 3466 3467 If the nnz parameter is given then the nz parameter is ignored 3468 3469 A nonzero block is any block that as 1 or more nonzeros in it 3470 3471 The block AIJ format is fully compatible with standard Fortran 77 3472 storage. That is, the stored row and column indices can begin at 3473 either one (as in Fortran) or zero. See the users' manual for details. 3474 3475 Specify the preallocated storage with either nz or nnz (not both). 3476 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3477 allocation. For additional details, see the users manual chapter on 3478 matrices. 3479 3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3481 @*/ 3482 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3483 { 3484 PetscErrorCode ierr; 3485 3486 PetscFunctionBegin; 3487 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3488 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3489 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3490 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3496 /*@C 3497 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3498 per row in the matrix. For good matrix assembly performance the 3499 user should preallocate the matrix storage by setting the parameter nz 3500 (or the array nnz). By setting these parameters accurately, performance 3501 during matrix assembly can be increased by more than a factor of 50. 3502 3503 Collective on MPI_Comm 3504 3505 Input Parameters: 3506 + A - the matrix 3507 . bs - size of block 3508 . nz - number of block nonzeros per block row (same for all rows) 3509 - nnz - array containing the number of block nonzeros in the various block rows 3510 (possibly different for each block row) or PETSC_NULL 3511 3512 Options Database Keys: 3513 . -mat_no_unroll - uses code that does not unroll the loops in the 3514 block calculations (much slower) 3515 . -mat_block_size - size of the blocks to use 3516 3517 Level: intermediate 3518 3519 Notes: 3520 If the nnz parameter is given then the nz parameter is ignored 3521 3522 You can call MatGetInfo() to get information on how effective the preallocation was; 3523 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3524 You can also run with the option -info and look for messages with the string 3525 malloc in them to see if additional memory allocation was needed. 3526 3527 The block AIJ format is fully compatible with standard Fortran 77 3528 storage. That is, the stored row and column indices can begin at 3529 either one (as in Fortran) or zero. See the users' manual for details. 3530 3531 Specify the preallocated storage with either nz or nnz (not both). 3532 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3533 allocation. For additional details, see the users manual chapter on 3534 matrices. 3535 3536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3537 @*/ 3538 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3539 { 3540 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3541 3542 PetscFunctionBegin; 3543 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3544 if (f) { 3545 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3546 } 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3552 /*@C 3553 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3554 (the default sequential PETSc format). 3555 3556 Collective on MPI_Comm 3557 3558 Input Parameters: 3559 + A - the matrix 3560 . i - the indices into j for the start of each local row (starts with zero) 3561 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3562 - v - optional values in the matrix 3563 3564 Level: developer 3565 3566 .keywords: matrix, aij, compressed row, sparse 3567 3568 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3569 @*/ 3570 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3571 { 3572 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3573 3574 PetscFunctionBegin; 3575 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3576 if (f) { 3577 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581 3582 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3585 /*@ 3586 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3587 (upper triangular entries in CSR format) provided by the user. 3588 3589 Collective on MPI_Comm 3590 3591 Input Parameters: 3592 + comm - must be an MPI communicator of size 1 3593 . bs - size of block 3594 . m - number of rows 3595 . n - number of columns 3596 . i - row indices 3597 . j - column indices 3598 - a - matrix values 3599 3600 Output Parameter: 3601 . mat - the matrix 3602 3603 Level: intermediate 3604 3605 Notes: 3606 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3607 once the matrix is destroyed 3608 3609 You cannot set new nonzero locations into this matrix, that will generate an error. 3610 3611 The i and j indices are 0 based 3612 3613 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3614 3615 @*/ 3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3617 { 3618 PetscErrorCode ierr; 3619 PetscInt ii; 3620 Mat_SeqBAIJ *baij; 3621 3622 PetscFunctionBegin; 3623 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3624 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3625 3626 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3627 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3628 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3629 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3630 baij = (Mat_SeqBAIJ*)(*mat)->data; 3631 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3632 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3633 3634 baij->i = i; 3635 baij->j = j; 3636 baij->a = a; 3637 baij->singlemalloc = PETSC_FALSE; 3638 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3639 baij->free_a = PETSC_FALSE; 3640 baij->free_ij = PETSC_FALSE; 3641 3642 for (ii=0; ii<m; ii++) { 3643 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3644 #if defined(PETSC_USE_DEBUG) 3645 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3646 #endif 3647 } 3648 #if defined(PETSC_USE_DEBUG) 3649 for (ii=0; ii<baij->i[m]; ii++) { 3650 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3651 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3652 } 3653 #endif 3654 3655 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 PetscFunctionReturn(0); 3658 } 3659