1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 1: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 1*diag_offset[i]; 68 diag[0] = odiag[0]; 69 mdiag[0] = odiag[0]; 70 diag[0] = 1.0 / (diag[0] + shift); 71 diag += 1; 72 mdiag += 1; 73 } 74 break; 75 case 2: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 4*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 81 diag += 4; 82 mdiag += 4; 83 } 84 break; 85 case 3: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 9*diag_offset[i]; 88 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 89 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 90 diag[8] = odiag[8]; 91 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 92 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 93 mdiag[8] = odiag[8]; 94 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 95 diag += 9; 96 mdiag += 9; 97 } 98 break; 99 case 4: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 16*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 105 diag += 16; 106 mdiag += 16; 107 } 108 break; 109 case 5: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 25*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 115 diag += 25; 116 mdiag += 25; 117 } 118 break; 119 case 6: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 36*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 125 diag += 36; 126 mdiag += 36; 127 } 128 break; 129 default: 130 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 139 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 PetscScalar *x,x1,s1; 143 const PetscScalar *b; 144 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 145 PetscErrorCode ierr; 146 PetscInt m = a->mbs,i,i2,nz,j; 147 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 148 149 PetscFunctionBegin; 150 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 151 its = its*lits; 152 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 153 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 154 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 155 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 156 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 157 158 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 159 160 diag = a->diag; 161 idiag = a->idiag; 162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 163 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 164 165 if (flag & SOR_ZERO_INITIAL_GUESS) { 166 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 167 x[0] = b[0]*idiag[0]; 168 i2 = 1; 169 idiag += 1; 170 for (i=1; i<m; i++) { 171 v = aa + ai[i]; 172 vi = aj + ai[i]; 173 nz = diag[i] - ai[i]; 174 s1 = b[i2]; 175 for (j=0; j<nz; j++) { 176 s1 -= v[j]*x[vi[j]]; 177 } 178 x[i2] = idiag[0]*s1; 179 idiag += 1; 180 i2 += 1; 181 } 182 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 183 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 184 } 185 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 186 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 187 i2 = 0; 188 mdiag = a->idiag+a->mbs; 189 for (i=0; i<m; i++) { 190 x1 = x[i2]; 191 x[i2] = mdiag[0]*x1; 192 mdiag += 1; 193 i2 += 1; 194 } 195 ierr = PetscLogFlops(m);CHKERRQ(ierr); 196 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 197 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 198 } 199 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 200 idiag = a->idiag+a->mbs - 1; 201 i2 = m - 1; 202 x1 = x[i2]; 203 x[i2] = idiag[0]*x1; 204 idiag -= 1; 205 i2 -= 1; 206 for (i=m-2; i>=0; i--) { 207 v = aa + (diag[i]+1); 208 vi = aj + diag[i] + 1; 209 nz = ai[i+1] - diag[i] - 1; 210 s1 = x[i2]; 211 for (j=0; j<nz; j++) { 212 s1 -= v[j]*x[vi[j]]; 213 } 214 x[i2] = idiag[0]*s1; 215 idiag -= 1; 216 i2 -= 1; 217 } 218 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 219 } 220 } else { 221 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 222 } 223 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 #undef __FUNCT__ 229 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 230 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 231 { 232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 233 PetscScalar *x,x1,x2,s1,s2; 234 const PetscScalar *b; 235 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 236 PetscErrorCode ierr; 237 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 238 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 239 240 PetscFunctionBegin; 241 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 242 its = its*lits; 243 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 244 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 245 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 246 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 247 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 248 249 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 250 251 diag = a->diag; 252 idiag = a->idiag; 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 255 256 if (flag & SOR_ZERO_INITIAL_GUESS) { 257 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 258 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 259 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 260 i2 = 2; 261 idiag += 4; 262 for (i=1; i<m; i++) { 263 v = aa + 4*ai[i]; 264 vi = aj + ai[i]; 265 nz = diag[i] - ai[i]; 266 s1 = b[i2]; s2 = b[i2+1]; 267 for (j=0; j<nz; j++) { 268 idx = 2*vi[j]; 269 it = 4*j; 270 x1 = x[idx]; x2 = x[1+idx]; 271 s1 -= v[it]*x1 + v[it+2]*x2; 272 s2 -= v[it+1]*x1 + v[it+3]*x2; 273 } 274 x[i2] = idiag[0]*s1 + idiag[2]*s2; 275 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 276 idiag += 4; 277 i2 += 2; 278 } 279 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 280 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 281 } 282 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 283 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 284 i2 = 0; 285 mdiag = a->idiag+4*a->mbs; 286 for (i=0; i<m; i++) { 287 x1 = x[i2]; x2 = x[i2+1]; 288 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 289 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 290 mdiag += 4; 291 i2 += 2; 292 } 293 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 294 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 295 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 296 } 297 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 298 idiag = a->idiag+4*a->mbs - 4; 299 i2 = 2*m - 2; 300 x1 = x[i2]; x2 = x[i2+1]; 301 x[i2] = idiag[0]*x1 + idiag[2]*x2; 302 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 303 idiag -= 4; 304 i2 -= 2; 305 for (i=m-2; i>=0; i--) { 306 v = aa + 4*(diag[i]+1); 307 vi = aj + diag[i] + 1; 308 nz = ai[i+1] - diag[i] - 1; 309 s1 = x[i2]; s2 = x[i2+1]; 310 for (j=0; j<nz; j++) { 311 idx = 2*vi[j]; 312 it = 4*j; 313 x1 = x[idx]; x2 = x[1+idx]; 314 s1 -= v[it]*x1 + v[it+2]*x2; 315 s2 -= v[it+1]*x1 + v[it+3]*x2; 316 } 317 x[i2] = idiag[0]*s1 + idiag[2]*s2; 318 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 319 idiag -= 4; 320 i2 -= 2; 321 } 322 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 323 } 324 } else { 325 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 326 } 327 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 328 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 329 PetscFunctionReturn(0); 330 } 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 334 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 335 { 336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 337 PetscScalar *x,x1,x2,x3,s1,s2,s3; 338 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 339 const PetscScalar *b; 340 PetscErrorCode ierr; 341 PetscInt m = a->mbs,i,i2,nz,idx; 342 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 343 344 PetscFunctionBegin; 345 its = its*lits; 346 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 347 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 348 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 349 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 350 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 351 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 352 353 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 354 355 diag = a->diag; 356 idiag = a->idiag; 357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 359 360 if (flag & SOR_ZERO_INITIAL_GUESS) { 361 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 362 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 363 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 364 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 365 i2 = 3; 366 idiag += 9; 367 for (i=1; i<m; i++) { 368 v = aa + 9*ai[i]; 369 vi = aj + ai[i]; 370 nz = diag[i] - ai[i]; 371 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 372 while (nz--) { 373 idx = 3*(*vi++); 374 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 375 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 376 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 377 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 378 v += 9; 379 } 380 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 381 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 382 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 383 idiag += 9; 384 i2 += 3; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 388 } 389 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 390 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 391 i2 = 0; 392 mdiag = a->idiag+9*a->mbs; 393 for (i=0; i<m; i++) { 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 396 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 397 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 398 mdiag += 9; 399 i2 += 3; 400 } 401 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+9*a->mbs - 9; 407 i2 = 3*m - 3; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 409 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 410 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 411 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 412 idiag -= 9; 413 i2 -= 3; 414 for (i=m-2; i>=0; i--) { 415 v = aa + 9*(diag[i]+1); 416 vi = aj + diag[i] + 1; 417 nz = ai[i+1] - diag[i] - 1; 418 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 419 while (nz--) { 420 idx = 3*(*vi++); 421 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 422 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 423 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 424 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 425 v += 9; 426 } 427 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 428 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 429 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 430 idiag -= 9; 431 i2 -= 3; 432 } 433 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 445 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 449 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 450 const PetscScalar *b; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 457 its = its*lits; 458 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 459 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 460 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 461 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 462 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 463 464 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 465 466 diag = a->diag; 467 idiag = a->idiag; 468 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 469 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 470 471 if (flag & SOR_ZERO_INITIAL_GUESS) { 472 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 473 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 474 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 475 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 476 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 477 i2 = 4; 478 idiag += 16; 479 for (i=1; i<m; i++) { 480 v = aa + 16*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 484 while (nz--) { 485 idx = 4*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 487 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 488 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 489 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 490 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 491 v += 16; 492 } 493 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 494 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 495 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 496 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 497 idiag += 16; 498 i2 += 4; 499 } 500 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 501 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 502 } 503 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 504 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 505 i2 = 0; 506 mdiag = a->idiag+16*a->mbs; 507 for (i=0; i<m; i++) { 508 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 509 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 510 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 511 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 512 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 513 mdiag += 16; 514 i2 += 4; 515 } 516 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 517 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 518 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 519 } 520 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 521 idiag = a->idiag+16*a->mbs - 16; 522 i2 = 4*m - 4; 523 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 524 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 525 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 526 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 527 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 528 idiag -= 16; 529 i2 -= 4; 530 for (i=m-2; i>=0; i--) { 531 v = aa + 16*(diag[i]+1); 532 vi = aj + diag[i] + 1; 533 nz = ai[i+1] - diag[i] - 1; 534 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 535 while (nz--) { 536 idx = 4*(*vi++); 537 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 538 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 539 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 540 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 541 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 542 v += 16; 543 } 544 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 545 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 546 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 547 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 548 idiag -= 16; 549 i2 -= 4; 550 } 551 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 552 } 553 } else { 554 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 555 } 556 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 557 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 563 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 564 { 565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 566 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 567 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 568 const PetscScalar *b; 569 PetscErrorCode ierr; 570 PetscInt m = a->mbs,i,i2,nz,idx; 571 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 572 573 PetscFunctionBegin; 574 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 575 its = its*lits; 576 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 577 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 578 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 579 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 580 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 581 582 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 583 584 diag = a->diag; 585 idiag = a->idiag; 586 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 587 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 588 589 if (flag & SOR_ZERO_INITIAL_GUESS) { 590 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 591 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 592 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 593 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 594 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 595 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 596 i2 = 5; 597 idiag += 25; 598 for (i=1; i<m; i++) { 599 v = aa + 25*ai[i]; 600 vi = aj + ai[i]; 601 nz = diag[i] - ai[i]; 602 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 603 while (nz--) { 604 idx = 5*(*vi++); 605 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 606 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 607 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 608 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 609 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 610 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 611 v += 25; 612 } 613 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 614 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 615 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 616 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 617 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 618 idiag += 25; 619 i2 += 5; 620 } 621 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 622 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 623 } 624 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 625 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 626 i2 = 0; 627 mdiag = a->idiag+25*a->mbs; 628 for (i=0; i<m; i++) { 629 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 630 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 631 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 632 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 633 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 634 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 635 mdiag += 25; 636 i2 += 5; 637 } 638 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 639 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 640 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 641 } 642 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 643 idiag = a->idiag+25*a->mbs - 25; 644 i2 = 5*m - 5; 645 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 646 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 647 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 648 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 649 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 650 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 651 idiag -= 25; 652 i2 -= 5; 653 for (i=m-2; i>=0; i--) { 654 v = aa + 25*(diag[i]+1); 655 vi = aj + diag[i] + 1; 656 nz = ai[i+1] - diag[i] - 1; 657 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 658 while (nz--) { 659 idx = 5*(*vi++); 660 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 661 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 662 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 663 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 664 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 665 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 666 v += 25; 667 } 668 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 669 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 670 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 671 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 672 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 673 idiag -= 25; 674 i2 -= 5; 675 } 676 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 677 } 678 } else { 679 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 680 } 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 688 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 692 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 693 const PetscScalar *b; 694 PetscErrorCode ierr; 695 PetscInt m = a->mbs,i,i2,nz,idx; 696 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 697 698 PetscFunctionBegin; 699 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 700 its = its*lits; 701 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 702 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 703 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 704 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 705 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 706 707 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 708 709 diag = a->diag; 710 idiag = a->idiag; 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 713 714 if (flag & SOR_ZERO_INITIAL_GUESS) { 715 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 716 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 717 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 718 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 719 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 720 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 721 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 722 i2 = 6; 723 idiag += 36; 724 for (i=1; i<m; i++) { 725 v = aa + 36*ai[i]; 726 vi = aj + ai[i]; 727 nz = diag[i] - ai[i]; 728 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 729 while (nz--) { 730 idx = 6*(*vi++); 731 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 732 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 733 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 734 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 735 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 736 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 737 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 738 v += 36; 739 } 740 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 741 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 742 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 743 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 744 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 745 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 746 idiag += 36; 747 i2 += 6; 748 } 749 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 750 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 751 } 752 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 753 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 754 i2 = 0; 755 mdiag = a->idiag+36*a->mbs; 756 for (i=0; i<m; i++) { 757 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 758 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 759 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 760 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 761 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 762 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 763 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 764 mdiag += 36; 765 i2 += 6; 766 } 767 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 768 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 769 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 770 } 771 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 772 idiag = a->idiag+36*a->mbs - 36; 773 i2 = 6*m - 6; 774 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 775 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 776 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 777 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 778 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 779 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 780 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 781 idiag -= 36; 782 i2 -= 6; 783 for (i=m-2; i>=0; i--) { 784 v = aa + 36*(diag[i]+1); 785 vi = aj + diag[i] + 1; 786 nz = ai[i+1] - diag[i] - 1; 787 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 788 while (nz--) { 789 idx = 6*(*vi++); 790 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 791 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 792 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 793 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 794 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 795 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 796 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 797 v += 36; 798 } 799 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 800 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 801 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 802 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 803 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 804 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 805 idiag -= 36; 806 i2 -= 6; 807 } 808 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 809 } 810 } else { 811 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 812 } 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNCT__ 819 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 820 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 821 { 822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 823 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 824 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 825 const PetscScalar *b; 826 PetscErrorCode ierr; 827 PetscInt m = a->mbs,i,i2,nz,idx; 828 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 829 830 PetscFunctionBegin; 831 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 832 its = its*lits; 833 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 834 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 835 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 836 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 837 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 838 839 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 840 841 diag = a->diag; 842 idiag = a->idiag; 843 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 844 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 845 846 if (flag & SOR_ZERO_INITIAL_GUESS) { 847 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 848 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 849 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 850 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 851 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 852 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 853 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 854 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 855 i2 = 7; 856 idiag += 49; 857 for (i=1; i<m; i++) { 858 v = aa + 49*ai[i]; 859 vi = aj + ai[i]; 860 nz = diag[i] - ai[i]; 861 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 865 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 866 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 867 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 868 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 869 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 870 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 871 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 872 v += 49; 873 } 874 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 875 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 876 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 877 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 878 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 879 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 880 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 881 idiag += 49; 882 i2 += 7; 883 } 884 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 885 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 886 } 887 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 888 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 889 i2 = 0; 890 mdiag = a->idiag+49*a->mbs; 891 for (i=0; i<m; i++) { 892 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 893 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 894 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 895 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 896 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 897 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 898 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 899 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 900 mdiag += 36; 901 i2 += 6; 902 } 903 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 904 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 905 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 906 } 907 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 idiag = a->idiag+49*a->mbs - 49; 909 i2 = 7*m - 7; 910 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 911 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 912 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 913 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 914 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 915 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 916 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 917 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 918 idiag -= 49; 919 i2 -= 7; 920 for (i=m-2; i>=0; i--) { 921 v = aa + 49*(diag[i]+1); 922 vi = aj + diag[i] + 1; 923 nz = ai[i+1] - diag[i] - 1; 924 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 925 while (nz--) { 926 idx = 7*(*vi++); 927 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 928 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 929 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 930 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 931 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 932 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 933 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 934 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 935 v += 49; 936 } 937 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 938 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 939 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 940 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 941 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 942 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 943 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 944 idiag -= 49; 945 i2 -= 7; 946 } 947 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 948 } 949 } else { 950 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 951 } 952 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 953 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 /* 958 Special version for direct calls from Fortran (Used in PETSc-fun3d) 959 */ 960 #if defined(PETSC_HAVE_FORTRAN_CAPS) 961 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 962 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 963 #define matsetvaluesblocked4_ matsetvaluesblocked4 964 #endif 965 966 EXTERN_C_BEGIN 967 #undef __FUNCT__ 968 #define __FUNCT__ "matsetvaluesblocked4_" 969 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 970 { 971 Mat A = *AA; 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 974 PetscInt *ai=a->i,*ailen=a->ilen; 975 PetscInt *aj=a->j,stepval,lastcol = -1; 976 const PetscScalar *value = v; 977 MatScalar *ap,*aa = a->a,*bap; 978 979 PetscFunctionBegin; 980 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 981 stepval = (n-1)*4; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; 984 rp = aj + ai[row]; 985 ap = aa + 16*ai[row]; 986 nrow = ailen[row]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; 991 if (col <= lastcol) low = 0; else high = nrow; 992 lastcol = col; 993 value = v + k*(stepval+4 + l)*4; 994 while (high-low > 7) { 995 t = (low+high)/2; 996 if (rp[t] > col) high = t; 997 else low = t; 998 } 999 for (i=low; i<high; i++) { 1000 if (rp[i] > col) break; 1001 if (rp[i] == col) { 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] += *value++; 1006 } 1007 } 1008 goto noinsert2; 1009 } 1010 } 1011 N = nrow++ - 1; 1012 high++; /* added new column index thus must search to one higher than before */ 1013 /* shift up all the later entries in this row */ 1014 for (ii=N; ii>=i; ii--) { 1015 rp[ii+1] = rp[ii]; 1016 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1017 } 1018 if (N >= i) { 1019 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1020 } 1021 rp[i] = col; 1022 bap = ap + 16*i; 1023 for (ii=0; ii<4; ii++,value+=stepval) { 1024 for (jj=ii; jj<16; jj+=4) { 1025 bap[jj] = *value++; 1026 } 1027 } 1028 noinsert2:; 1029 low = i; 1030 } 1031 ailen[row] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 EXTERN_C_END 1036 1037 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1038 #define matsetvalues4_ MATSETVALUES4 1039 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1040 #define matsetvalues4_ matsetvalues4 1041 #endif 1042 1043 EXTERN_C_BEGIN 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatSetValues4_" 1046 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1047 { 1048 Mat A = *AA; 1049 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1050 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1051 PetscInt *ai=a->i,*ailen=a->ilen; 1052 PetscInt *aj=a->j,brow,bcol; 1053 PetscInt ridx,cidx,lastcol = -1; 1054 MatScalar *ap,value,*aa=a->a,*bap; 1055 1056 PetscFunctionBegin; 1057 for (k=0; k<m; k++) { /* loop over added rows */ 1058 row = im[k]; brow = row/4; 1059 rp = aj + ai[brow]; 1060 ap = aa + 16*ai[brow]; 1061 nrow = ailen[brow]; 1062 low = 0; 1063 high = nrow; 1064 for (l=0; l<n; l++) { /* loop over added columns */ 1065 col = in[l]; bcol = col/4; 1066 ridx = row % 4; cidx = col % 4; 1067 value = v[l + k*n]; 1068 if (col <= lastcol) low = 0; else high = nrow; 1069 lastcol = col; 1070 while (high-low > 7) { 1071 t = (low+high)/2; 1072 if (rp[t] > bcol) high = t; 1073 else low = t; 1074 } 1075 for (i=low; i<high; i++) { 1076 if (rp[i] > bcol) break; 1077 if (rp[i] == bcol) { 1078 bap = ap + 16*i + 4*cidx + ridx; 1079 *bap += value; 1080 goto noinsert1; 1081 } 1082 } 1083 N = nrow++ - 1; 1084 high++; /* added new column thus must search to one higher than before */ 1085 /* shift up all the later entries in this row */ 1086 for (ii=N; ii>=i; ii--) { 1087 rp[ii+1] = rp[ii]; 1088 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1089 } 1090 if (N>=i) { 1091 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1092 } 1093 rp[i] = bcol; 1094 ap[16*i + 4*cidx + ridx] = value; 1095 noinsert1:; 1096 low = i; 1097 } 1098 ailen[brow] = nrow; 1099 } 1100 PetscFunctionReturnVoid(); 1101 } 1102 EXTERN_C_END 1103 1104 /* 1105 Checks for missing diagonals 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1109 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt *diag,*jj = a->j,i; 1114 1115 PetscFunctionBegin; 1116 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1117 *missing = PETSC_FALSE; 1118 if (A->rmap->n > 0 && !jj) { 1119 *missing = PETSC_TRUE; 1120 if (d) *d = 0; 1121 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1122 } else { 1123 diag = a->diag; 1124 for (i=0; i<a->mbs; i++) { 1125 if (jj[diag[i]] != i) { 1126 *missing = PETSC_TRUE; 1127 if (d) *d = i; 1128 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1129 } 1130 } 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1137 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1138 { 1139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 PetscInt i,j,m = a->mbs; 1142 1143 PetscFunctionBegin; 1144 if (!a->diag) { 1145 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1146 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1147 a->free_diag = PETSC_TRUE; 1148 } 1149 for (i=0; i<m; i++) { 1150 a->diag[i] = a->i[i+1]; 1151 for (j=a->i[i]; j<a->i[i+1]; j++) { 1152 if (a->j[j] == i) { 1153 a->diag[i] = j; 1154 break; 1155 } 1156 } 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 1162 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1166 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1167 { 1168 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1171 PetscInt *tia, *tja; 1172 1173 PetscFunctionBegin; 1174 *nn = n; 1175 if (!ia) PetscFunctionReturn(0); 1176 if (symmetric) { 1177 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1178 } else { 1179 tia = a->i; tja = a->j; 1180 } 1181 1182 if (!blockcompressed && bs > 1) { 1183 (*nn) *= bs; 1184 nbs = bs; 1185 /* malloc & create the natural set of indices */ 1186 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1187 if (n) { 1188 (*ia)[0] = 0; 1189 for (j=1; j<bs; j++) { 1190 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1191 } 1192 } 1193 1194 for (i=1; i<n; i++) { 1195 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1196 for (j=1; j<bs; j++) { 1197 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1198 } 1199 } 1200 if (n) { 1201 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1202 } 1203 1204 if (ja) { 1205 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1206 cnt = 0; 1207 for (i=0; i<n; i++) { 1208 for (j=0; j<bs; j++) { 1209 for (k=tia[i]; k<tia[i+1]; k++) { 1210 for (l=0; l<bs; l++) { 1211 (*ja)[cnt++] = bs*tja[k] + l; 1212 } 1213 } 1214 } 1215 } 1216 } 1217 1218 n *= bs; 1219 nz *= bs*bs; 1220 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1221 ierr = PetscFree(tia);CHKERRQ(ierr); 1222 ierr = PetscFree(tja);CHKERRQ(ierr); 1223 } 1224 } else if (oshift == 1) { 1225 if (symmetric) { 1226 PetscInt nz = tia[A->rmap->n/bs]; 1227 /* add 1 to i and j indices */ 1228 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1229 *ia = tia; 1230 if (ja) { 1231 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1232 *ja = tja; 1233 } 1234 } else { 1235 PetscInt nz = a->i[A->rmap->n/bs]; 1236 /* malloc space and add 1 to i and j indices */ 1237 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1238 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1239 if (ja) { 1240 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1241 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1242 } 1243 } 1244 } else { 1245 *ia = tia; 1246 if (ja) *ja = tja; 1247 } 1248 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1255 { 1256 PetscErrorCode ierr; 1257 1258 PetscFunctionBegin; 1259 if (!ia) PetscFunctionReturn(0); 1260 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1261 ierr = PetscFree(*ia);CHKERRQ(ierr); 1262 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1270 { 1271 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1277 #endif 1278 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1279 if (a->row) { 1280 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1281 } 1282 if (a->col) { 1283 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1284 } 1285 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1286 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1287 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1288 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1289 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1290 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1291 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1292 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1293 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1294 1295 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1296 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1297 ierr = PetscFree(a);CHKERRQ(ierr); 1298 1299 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1314 { 1315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 switch (op) { 1320 case MAT_ROW_ORIENTED: 1321 a->roworiented = flg; 1322 break; 1323 case MAT_KEEP_NONZERO_PATTERN: 1324 a->keepnonzeropattern = flg; 1325 break; 1326 case MAT_NEW_NONZERO_LOCATIONS: 1327 a->nonew = (flg ? 0 : 1); 1328 break; 1329 case MAT_NEW_NONZERO_LOCATION_ERR: 1330 a->nonew = (flg ? -1 : 0); 1331 break; 1332 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1333 a->nonew = (flg ? -2 : 0); 1334 break; 1335 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1336 a->nounused = (flg ? -1 : 0); 1337 break; 1338 case MAT_NEW_DIAGONALS: 1339 case MAT_IGNORE_OFF_PROC_ENTRIES: 1340 case MAT_USE_HASH_TABLE: 1341 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1342 break; 1343 case MAT_SYMMETRIC: 1344 case MAT_STRUCTURALLY_SYMMETRIC: 1345 case MAT_HERMITIAN: 1346 case MAT_SYMMETRY_ETERNAL: 1347 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1348 break; 1349 default: 1350 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1358 { 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1362 MatScalar *aa,*aa_i; 1363 PetscScalar *v_i; 1364 1365 PetscFunctionBegin; 1366 bs = A->rmap->bs; 1367 ai = a->i; 1368 aj = a->j; 1369 aa = a->a; 1370 bs2 = a->bs2; 1371 1372 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1373 1374 bn = row/bs; /* Block number */ 1375 bp = row % bs; /* Block Position */ 1376 M = ai[bn+1] - ai[bn]; 1377 *nz = bs*M; 1378 1379 if (v) { 1380 *v = 0; 1381 if (*nz) { 1382 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1383 for (i=0; i<M; i++) { /* for each block in the block row */ 1384 v_i = *v + i*bs; 1385 aa_i = aa + bs2*(ai[bn] + i); 1386 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1387 } 1388 } 1389 } 1390 1391 if (idx) { 1392 *idx = 0; 1393 if (*nz) { 1394 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1395 for (i=0; i<M; i++) { /* for each block in the block row */ 1396 idx_i = *idx + i*bs; 1397 itmp = bs*aj[ai[bn] + i]; 1398 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1399 } 1400 } 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1408 { 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1413 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1419 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1420 { 1421 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1422 Mat C; 1423 PetscErrorCode ierr; 1424 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1425 PetscInt *rows,*cols,bs2=a->bs2; 1426 MatScalar *array; 1427 1428 PetscFunctionBegin; 1429 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1430 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1431 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1432 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1433 1434 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1435 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1436 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1437 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1438 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1439 ierr = PetscFree(col);CHKERRQ(ierr); 1440 } else { 1441 C = *B; 1442 } 1443 1444 array = a->a; 1445 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1446 cols = rows + bs; 1447 for (i=0; i<mbs; i++) { 1448 cols[0] = i*bs; 1449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1450 len = ai[i+1] - ai[i]; 1451 for (j=0; j<len; j++) { 1452 rows[0] = (*aj++)*bs; 1453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1454 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1455 array += bs2; 1456 } 1457 } 1458 ierr = PetscFree(rows);CHKERRQ(ierr); 1459 1460 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 1463 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1464 *B = C; 1465 } else { 1466 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1478 int fd; 1479 PetscScalar *aa; 1480 FILE *file; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1484 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1485 col_lens[0] = MAT_FILE_COOKIE; 1486 1487 col_lens[1] = A->rmap->N; 1488 col_lens[2] = A->cmap->n; 1489 col_lens[3] = a->nz*bs2; 1490 1491 /* store lengths of each row and write (including header) to file */ 1492 count = 0; 1493 for (i=0; i<a->mbs; i++) { 1494 for (j=0; j<bs; j++) { 1495 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1496 } 1497 } 1498 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1499 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1500 1501 /* store column indices (zero start index) */ 1502 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1503 count = 0; 1504 for (i=0; i<a->mbs; i++) { 1505 for (j=0; j<bs; j++) { 1506 for (k=a->i[i]; k<a->i[i+1]; k++) { 1507 for (l=0; l<bs; l++) { 1508 jj[count++] = bs*a->j[k] + l; 1509 } 1510 } 1511 } 1512 } 1513 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1514 ierr = PetscFree(jj);CHKERRQ(ierr); 1515 1516 /* store nonzero values */ 1517 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1518 count = 0; 1519 for (i=0; i<a->mbs; i++) { 1520 for (j=0; j<bs; j++) { 1521 for (k=a->i[i]; k<a->i[i+1]; k++) { 1522 for (l=0; l<bs; l++) { 1523 aa[count++] = a->a[bs2*k + l*bs + j]; 1524 } 1525 } 1526 } 1527 } 1528 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1529 ierr = PetscFree(aa);CHKERRQ(ierr); 1530 1531 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1532 if (file) { 1533 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1545 PetscViewerFormat format; 1546 1547 PetscFunctionBegin; 1548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1550 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1552 Mat aij; 1553 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1555 ierr = MatDestroy(aij);CHKERRQ(ierr); 1556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1557 PetscFunctionReturn(0); 1558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1560 for (i=0; i<a->mbs; i++) { 1561 for (j=0; j<bs; j++) { 1562 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1563 for (k=a->i[i]; k<a->i[i+1]; k++) { 1564 for (l=0; l<bs; l++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } 1575 #else 1576 if (a->a[bs2*k + l*bs + j] != 0.0) { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1578 } 1579 #endif 1580 } 1581 } 1582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1583 } 1584 } 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1586 } else { 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1588 for (i=0; i<a->mbs; i++) { 1589 for (j=0; j<bs; j++) { 1590 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1591 for (k=a->i[i]; k<a->i[i+1]; k++) { 1592 for (l=0; l<bs; l++) { 1593 #if defined(PETSC_USE_COMPLEX) 1594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1595 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1598 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1600 } else { 1601 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } 1603 #else 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1605 #endif 1606 } 1607 } 1608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1609 } 1610 } 1611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1612 } 1613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1620 { 1621 Mat A = (Mat) Aa; 1622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1623 PetscErrorCode ierr; 1624 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1625 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1626 MatScalar *aa; 1627 PetscViewer viewer; 1628 1629 PetscFunctionBegin; 1630 1631 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1632 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1633 1634 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1635 1636 /* loop over matrix elements drawing boxes */ 1637 color = PETSC_DRAW_BLUE; 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 if (PetscRealPart(*aa++) >= 0.) continue; 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 color = PETSC_DRAW_CYAN; 1652 for (i=0,row=0; i<mbs; i++,row+=bs) { 1653 for (j=a->i[i]; j<a->i[i+1]; j++) { 1654 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1655 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1656 aa = a->a + j*bs2; 1657 for (k=0; k<bs; k++) { 1658 for (l=0; l<bs; l++) { 1659 if (PetscRealPart(*aa++) != 0.) continue; 1660 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1661 } 1662 } 1663 } 1664 } 1665 1666 color = PETSC_DRAW_RED; 1667 for (i=0,row=0; i<mbs; i++,row+=bs) { 1668 for (j=a->i[i]; j<a->i[i+1]; j++) { 1669 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1670 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1671 aa = a->a + j*bs2; 1672 for (k=0; k<bs; k++) { 1673 for (l=0; l<bs; l++) { 1674 if (PetscRealPart(*aa++) <= 0.) continue; 1675 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1676 } 1677 } 1678 } 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1686 { 1687 PetscErrorCode ierr; 1688 PetscReal xl,yl,xr,yr,w,h; 1689 PetscDraw draw; 1690 PetscTruth isnull; 1691 1692 PetscFunctionBegin; 1693 1694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1696 1697 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1698 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1699 xr += w; yr += h; xl = -w; yl = -h; 1700 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1701 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatView_SeqBAIJ" 1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1709 { 1710 PetscErrorCode ierr; 1711 PetscTruth iascii,isbinary,isdraw; 1712 1713 PetscFunctionBegin; 1714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1717 if (iascii){ 1718 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1719 } else if (isbinary) { 1720 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1721 } else if (isdraw) { 1722 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1723 } else { 1724 Mat B; 1725 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1726 ierr = MatView(B,viewer);CHKERRQ(ierr); 1727 ierr = MatDestroy(B);CHKERRQ(ierr); 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1738 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1739 PetscInt *ai = a->i,*ailen = a->ilen; 1740 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1741 MatScalar *ap,*aa = a->a; 1742 1743 PetscFunctionBegin; 1744 for (k=0; k<m; k++) { /* loop over rows */ 1745 row = im[k]; brow = row/bs; 1746 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1747 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1748 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1749 nrow = ailen[brow]; 1750 for (l=0; l<n; l++) { /* loop over columns */ 1751 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1752 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1753 col = in[l] ; 1754 bcol = col/bs; 1755 cidx = col%bs; 1756 ridx = row%bs; 1757 high = nrow; 1758 low = 0; /* assume unsorted */ 1759 while (high-low > 5) { 1760 t = (low+high)/2; 1761 if (rp[t] > bcol) high = t; 1762 else low = t; 1763 } 1764 for (i=low; i<high; i++) { 1765 if (rp[i] > bcol) break; 1766 if (rp[i] == bcol) { 1767 *v++ = ap[bs2*i+bs*cidx+ridx]; 1768 goto finished; 1769 } 1770 } 1771 *v++ = 0.0; 1772 finished:; 1773 } 1774 } 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #define CHUNKSIZE 10 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1782 { 1783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1784 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1785 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1786 PetscErrorCode ierr; 1787 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1788 PetscTruth roworiented=a->roworiented; 1789 const PetscScalar *value = v; 1790 MatScalar *ap,*aa = a->a,*bap; 1791 1792 PetscFunctionBegin; 1793 if (roworiented) { 1794 stepval = (n-1)*bs; 1795 } else { 1796 stepval = (m-1)*bs; 1797 } 1798 for (k=0; k<m; k++) { /* loop over added rows */ 1799 row = im[k]; 1800 if (row < 0) continue; 1801 #if defined(PETSC_USE_DEBUG) 1802 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1803 #endif 1804 rp = aj + ai[row]; 1805 ap = aa + bs2*ai[row]; 1806 rmax = imax[row]; 1807 nrow = ailen[row]; 1808 low = 0; 1809 high = nrow; 1810 for (l=0; l<n; l++) { /* loop over added columns */ 1811 if (in[l] < 0) continue; 1812 #if defined(PETSC_USE_DEBUG) 1813 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1814 #endif 1815 col = in[l]; 1816 if (roworiented) { 1817 value = v + k*(stepval+bs)*bs + l*bs; 1818 } else { 1819 value = v + l*(stepval+bs)*bs + k*bs; 1820 } 1821 if (col <= lastcol) low = 0; else high = nrow; 1822 lastcol = col; 1823 while (high-low > 7) { 1824 t = (low+high)/2; 1825 if (rp[t] > col) high = t; 1826 else low = t; 1827 } 1828 for (i=low; i<high; i++) { 1829 if (rp[i] > col) break; 1830 if (rp[i] == col) { 1831 bap = ap + bs2*i; 1832 if (roworiented) { 1833 if (is == ADD_VALUES) { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] += *value++; 1837 } 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] = *value++; 1843 } 1844 } 1845 } 1846 } else { 1847 if (is == ADD_VALUES) { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 *bap++ += *value++; 1851 } 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 *bap++ = *value++; 1857 } 1858 } 1859 } 1860 } 1861 goto noinsert2; 1862 } 1863 } 1864 if (nonew == 1) goto noinsert2; 1865 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 N = nrow++ - 1; high++; 1868 /* shift up all the later entries in this row */ 1869 for (ii=N; ii>=i; ii--) { 1870 rp[ii+1] = rp[ii]; 1871 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1872 } 1873 if (N >= i) { 1874 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 rp[i] = col; 1877 bap = ap + bs2*i; 1878 if (roworiented) { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=ii; jj<bs2; jj+=bs) { 1881 bap[jj] = *value++; 1882 } 1883 } 1884 } else { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=0; jj<bs; jj++) { 1887 *bap++ = *value++; 1888 } 1889 } 1890 } 1891 noinsert2:; 1892 low = i; 1893 } 1894 ailen[row] = nrow; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1905 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1906 PetscErrorCode ierr; 1907 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1908 MatScalar *aa = a->a,*ap; 1909 PetscReal ratio=0.6; 1910 1911 PetscFunctionBegin; 1912 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1913 1914 if (m) rmax = ailen[0]; 1915 for (i=1; i<mbs; i++) { 1916 /* move each row back by the amount of empty slots (fshift) before it*/ 1917 fshift += imax[i-1] - ailen[i-1]; 1918 rmax = PetscMax(rmax,ailen[i]); 1919 if (fshift) { 1920 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1921 N = ailen[i]; 1922 for (j=0; j<N; j++) { 1923 ip[j-fshift] = ip[j]; 1924 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1925 } 1926 } 1927 ai[i] = ai[i-1] + ailen[i-1]; 1928 } 1929 if (mbs) { 1930 fshift += imax[mbs-1] - ailen[mbs-1]; 1931 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1932 } 1933 /* reset ilen and imax for each row */ 1934 for (i=0; i<mbs; i++) { 1935 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1936 } 1937 a->nz = ai[mbs]; 1938 1939 /* diagonals may have moved, so kill the diagonal pointers */ 1940 a->idiagvalid = PETSC_FALSE; 1941 if (fshift && a->diag) { 1942 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1944 a->diag = 0; 1945 } 1946 if (fshift && a->nounused == -1) { 1947 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 } 1949 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1952 a->reallocs = 0; 1953 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1954 1955 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1956 if (a->compressedrow.use){ 1957 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1958 } 1959 1960 A->same_nonzero = PETSC_TRUE; 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* 1965 This function returns an array of flags which indicate the locations of contiguous 1966 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1967 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1968 Assume: sizes should be long enough to hold all the values. 1969 */ 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1973 { 1974 PetscInt i,j,k,row; 1975 PetscTruth flg; 1976 1977 PetscFunctionBegin; 1978 for (i=0,j=0; i<n; j++) { 1979 row = idx[i]; 1980 if (row%bs!=0) { /* Not the begining of a block */ 1981 sizes[j] = 1; 1982 i++; 1983 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1984 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1985 i++; 1986 } else { /* Begining of the block, so check if the complete block exists */ 1987 flg = PETSC_TRUE; 1988 for (k=1; k<bs; k++) { 1989 if (row+k != idx[i+k]) { /* break in the block */ 1990 flg = PETSC_FALSE; 1991 break; 1992 } 1993 } 1994 if (flg) { /* No break in the bs */ 1995 sizes[j] = bs; 1996 i+= bs; 1997 } else { 1998 sizes[j] = 1; 1999 i++; 2000 } 2001 } 2002 } 2003 *bs_max = j; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2010 { 2011 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2012 PetscErrorCode ierr; 2013 PetscInt i,j,k,count,*rows; 2014 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2015 PetscScalar zero = 0.0; 2016 MatScalar *aa; 2017 2018 PetscFunctionBegin; 2019 /* Make a copy of the IS and sort it */ 2020 /* allocate memory for rows,sizes */ 2021 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2022 sizes = rows + is_n; 2023 2024 /* copy IS values to rows, and sort them */ 2025 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2026 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2027 if (baij->keepnonzeropattern) { 2028 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2029 bs_max = is_n; 2030 A->same_nonzero = PETSC_TRUE; 2031 } else { 2032 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2033 A->same_nonzero = PETSC_FALSE; 2034 } 2035 2036 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2037 row = rows[j]; 2038 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2039 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2040 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2041 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2042 if (diag != 0.0) { 2043 if (baij->ilen[row/bs] > 0) { 2044 baij->ilen[row/bs] = 1; 2045 baij->j[baij->i[row/bs]] = row/bs; 2046 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 /* Now insert all the diagonal values for this bs */ 2049 for (k=0; k<bs; k++) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } else { /* (diag == 0.0) */ 2053 baij->ilen[row/bs] = 0; 2054 } /* end (diag == 0.0) */ 2055 } else { /* (sizes[i] != bs) */ 2056 #if defined (PETSC_USE_DEBUG) 2057 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2058 #endif 2059 for (k=0; k<count; k++) { 2060 aa[0] = zero; 2061 aa += bs; 2062 } 2063 if (diag != 0.0) { 2064 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2065 } 2066 } 2067 } 2068 2069 ierr = PetscFree(rows);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2076 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2077 { 2078 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2079 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2080 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2081 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2082 PetscErrorCode ierr; 2083 PetscInt ridx,cidx,bs2=a->bs2; 2084 PetscTruth roworiented=a->roworiented; 2085 MatScalar *ap,value,*aa=a->a,*bap; 2086 2087 PetscFunctionBegin; 2088 for (k=0; k<m; k++) { /* loop over added rows */ 2089 row = im[k]; 2090 brow = row/bs; 2091 if (row < 0) continue; 2092 #if defined(PETSC_USE_DEBUG) 2093 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2094 #endif 2095 rp = aj + ai[brow]; 2096 ap = aa + bs2*ai[brow]; 2097 rmax = imax[brow]; 2098 nrow = ailen[brow]; 2099 low = 0; 2100 high = nrow; 2101 for (l=0; l<n; l++) { /* loop over added columns */ 2102 if (in[l] < 0) continue; 2103 #if defined(PETSC_USE_DEBUG) 2104 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2105 #endif 2106 col = in[l]; bcol = col/bs; 2107 ridx = row % bs; cidx = col % bs; 2108 if (roworiented) { 2109 value = v[l + k*n]; 2110 } else { 2111 value = v[k + l*m]; 2112 } 2113 if (col <= lastcol) low = 0; else high = nrow; 2114 lastcol = col; 2115 while (high-low > 7) { 2116 t = (low+high)/2; 2117 if (rp[t] > bcol) high = t; 2118 else low = t; 2119 } 2120 for (i=low; i<high; i++) { 2121 if (rp[i] > bcol) break; 2122 if (rp[i] == bcol) { 2123 bap = ap + bs2*i + bs*cidx + ridx; 2124 if (is == ADD_VALUES) *bap += value; 2125 else *bap = value; 2126 goto noinsert1; 2127 } 2128 } 2129 if (nonew == 1) goto noinsert1; 2130 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2131 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2132 N = nrow++ - 1; high++; 2133 /* shift up all the later entries in this row */ 2134 for (ii=N; ii>=i; ii--) { 2135 rp[ii+1] = rp[ii]; 2136 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2137 } 2138 if (N>=i) { 2139 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2140 } 2141 rp[i] = bcol; 2142 ap[bs2*i + bs*cidx + ridx] = value; 2143 a->nz++; 2144 noinsert1:; 2145 low = i; 2146 } 2147 ailen[brow] = nrow; 2148 } 2149 A->same_nonzero = PETSC_FALSE; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2157 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2158 { 2159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2160 Mat outA; 2161 PetscErrorCode ierr; 2162 PetscTruth row_identity,col_identity; 2163 2164 PetscFunctionBegin; 2165 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2166 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2167 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2168 if (!row_identity || !col_identity) { 2169 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2170 } 2171 2172 outA = inA; 2173 inA->factor = MAT_FACTOR_LU; 2174 2175 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2176 2177 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2178 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2179 a->row = row; 2180 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2181 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2182 a->col = col; 2183 2184 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2185 if (a->icol) { 2186 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2187 } 2188 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2189 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2190 2191 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2192 if (!a->solve_work) { 2193 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2194 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2195 } 2196 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2197 2198 PetscFunctionReturn(0); 2199 } 2200 2201 EXTERN_C_BEGIN 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2204 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2205 { 2206 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2207 PetscInt i,nz,mbs; 2208 2209 PetscFunctionBegin; 2210 nz = baij->maxnz/baij->bs2; 2211 mbs = baij->mbs; 2212 for (i=0; i<nz; i++) { 2213 baij->j[i] = indices[i]; 2214 } 2215 baij->nz = nz; 2216 for (i=0; i<mbs; i++) { 2217 baij->ilen[i] = baij->imax[i]; 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 EXTERN_C_END 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2225 /*@ 2226 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2227 in the matrix. 2228 2229 Input Parameters: 2230 + mat - the SeqBAIJ matrix 2231 - indices - the column indices 2232 2233 Level: advanced 2234 2235 Notes: 2236 This can be called if you have precomputed the nonzero structure of the 2237 matrix and want to provide it to the matrix object to improve the performance 2238 of the MatSetValues() operation. 2239 2240 You MUST have set the correct numbers of nonzeros per row in the call to 2241 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2242 2243 MUST be called before any calls to MatSetValues(); 2244 2245 @*/ 2246 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2247 { 2248 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2249 2250 PetscFunctionBegin; 2251 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2252 PetscValidPointer(indices,2); 2253 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2254 if (f) { 2255 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2256 } else { 2257 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2264 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2265 { 2266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2267 PetscErrorCode ierr; 2268 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2269 PetscReal atmp; 2270 PetscScalar *x,zero = 0.0; 2271 MatScalar *aa; 2272 PetscInt ncols,brow,krow,kcol; 2273 2274 PetscFunctionBegin; 2275 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2276 bs = A->rmap->bs; 2277 aa = a->a; 2278 ai = a->i; 2279 aj = a->j; 2280 mbs = a->mbs; 2281 2282 ierr = VecSet(v,zero);CHKERRQ(ierr); 2283 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2284 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2285 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2286 for (i=0; i<mbs; i++) { 2287 ncols = ai[1] - ai[0]; ai++; 2288 brow = bs*i; 2289 for (j=0; j<ncols; j++){ 2290 for (kcol=0; kcol<bs; kcol++){ 2291 for (krow=0; krow<bs; krow++){ 2292 atmp = PetscAbsScalar(*aa);aa++; 2293 row = brow + krow; /* row index */ 2294 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2295 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2296 } 2297 } 2298 aj++; 2299 } 2300 } 2301 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatCopy_SeqBAIJ" 2307 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2308 { 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 /* If the two matrices have the same copy implementation, use fast copy. */ 2313 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2315 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2316 2317 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2318 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2319 } 2320 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2321 } else { 2322 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2323 } 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2329 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2330 { 2331 PetscErrorCode ierr; 2332 2333 PetscFunctionBegin; 2334 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2340 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2341 { 2342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2343 PetscFunctionBegin; 2344 *array = a->a; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2350 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2351 { 2352 PetscFunctionBegin; 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #include "petscblaslapack.h" 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2359 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2360 { 2361 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2362 PetscErrorCode ierr; 2363 PetscInt i,bs=Y->rmap->bs,j,bs2; 2364 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2365 2366 PetscFunctionBegin; 2367 if (str == SAME_NONZERO_PATTERN) { 2368 PetscScalar alpha = a; 2369 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2370 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2371 if (y->xtoy && y->XtoY != X) { 2372 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2373 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2374 } 2375 if (!y->xtoy) { /* get xtoy */ 2376 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2377 y->XtoY = X; 2378 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2379 } 2380 bs2 = bs*bs; 2381 for (i=0; i<x->nz; i++) { 2382 j = 0; 2383 while (j < bs2){ 2384 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2385 j++; 2386 } 2387 } 2388 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2389 } else { 2390 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2397 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2398 { 2399 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2400 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2401 MatScalar *aa = a->a; 2402 2403 PetscFunctionBegin; 2404 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2410 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2411 { 2412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2413 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2414 MatScalar *aa = a->a; 2415 2416 PetscFunctionBegin; 2417 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2425 /* 2426 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2427 */ 2428 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2429 { 2430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2431 PetscErrorCode ierr; 2432 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2433 PetscInt nz = a->i[m],row,*jj,mr,col; 2434 2435 PetscFunctionBegin; 2436 *nn = n; 2437 if (!ia) PetscFunctionReturn(0); 2438 if (symmetric) { 2439 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2440 } else { 2441 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2442 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2443 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2444 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2445 jj = a->j; 2446 for (i=0; i<nz; i++) { 2447 collengths[jj[i]]++; 2448 } 2449 cia[0] = oshift; 2450 for (i=0; i<n; i++) { 2451 cia[i+1] = cia[i] + collengths[i]; 2452 } 2453 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2454 jj = a->j; 2455 for (row=0; row<m; row++) { 2456 mr = a->i[row+1] - a->i[row]; 2457 for (i=0; i<mr; i++) { 2458 col = *jj++; 2459 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2460 } 2461 } 2462 ierr = PetscFree(collengths);CHKERRQ(ierr); 2463 *ia = cia; *ja = cja; 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2470 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2471 { 2472 PetscErrorCode ierr; 2473 2474 PetscFunctionBegin; 2475 if (!ia) PetscFunctionReturn(0); 2476 ierr = PetscFree(*ia);CHKERRQ(ierr); 2477 ierr = PetscFree(*ja);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2483 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2484 { 2485 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2486 PetscErrorCode ierr; 2487 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2488 PetscScalar dx,*y,*xx,*w3_array; 2489 PetscScalar *vscale_array; 2490 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2491 Vec w1=coloring->w1,w2=coloring->w2,w3; 2492 void *fctx = coloring->fctx; 2493 PetscTruth flg = PETSC_FALSE; 2494 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2495 Vec x1_tmp; 2496 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2499 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2500 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2501 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2502 2503 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2504 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2505 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2506 if (flg) { 2507 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2508 } else { 2509 PetscTruth assembled; 2510 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2511 if (assembled) { 2512 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2513 } 2514 } 2515 2516 x1_tmp = x1; 2517 if (!coloring->vscale){ 2518 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2519 } 2520 2521 /* 2522 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2523 coloring->F for the coarser grids from the finest 2524 */ 2525 if (coloring->F) { 2526 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2527 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2528 if (m1 != m2) { 2529 coloring->F = 0; 2530 } 2531 } 2532 2533 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2534 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2535 } 2536 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2537 2538 /* Set w1 = F(x1) */ 2539 if (coloring->F) { 2540 w1 = coloring->F; /* use already computed value of function */ 2541 coloring->F = 0; 2542 } else { 2543 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2544 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2545 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2546 } 2547 2548 if (!coloring->w3) { 2549 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2550 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2551 } 2552 w3 = coloring->w3; 2553 2554 CHKMEMQ; 2555 /* Compute all the local scale factors, including ghost points */ 2556 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2557 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2558 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2559 if (ctype == IS_COLORING_GHOSTED){ 2560 col_start = 0; col_end = N; 2561 } else if (ctype == IS_COLORING_GLOBAL){ 2562 xx = xx - start; 2563 vscale_array = vscale_array - start; 2564 col_start = start; col_end = N + start; 2565 } CHKMEMQ; 2566 for (col=col_start; col<col_end; col++){ 2567 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2568 if (coloring->htype[0] == 'w') { 2569 dx = 1.0 + unorm; 2570 } else { 2571 dx = xx[col]; 2572 } 2573 if (dx == 0.0) dx = 1.0; 2574 #if !defined(PETSC_USE_COMPLEX) 2575 if (dx < umin && dx >= 0.0) dx = umin; 2576 else if (dx < 0.0 && dx > -umin) dx = -umin; 2577 #else 2578 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2579 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2580 #endif 2581 dx *= epsilon; 2582 vscale_array[col] = 1.0/dx; 2583 } CHKMEMQ; 2584 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2585 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2586 if (ctype == IS_COLORING_GLOBAL){ 2587 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2588 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2589 } 2590 CHKMEMQ; 2591 if (coloring->vscaleforrow) { 2592 vscaleforrow = coloring->vscaleforrow; 2593 } else { 2594 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2595 } 2596 2597 2598 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2599 /* 2600 Loop over each color 2601 */ 2602 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2603 for (k=0; k<coloring->ncolors; k++) { 2604 coloring->currentcolor = k; 2605 for (i=0; i<bs; i++) { 2606 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2607 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2608 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2609 /* 2610 Loop over each column associated with color 2611 adding the perturbation to the vector w3. 2612 */ 2613 for (l=0; l<coloring->ncolumns[k]; l++) { 2614 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2615 if (coloring->htype[0] == 'w') { 2616 dx = 1.0 + unorm; 2617 } else { 2618 dx = xx[col]; 2619 } 2620 if (dx == 0.0) dx = 1.0; 2621 #if !defined(PETSC_USE_COMPLEX) 2622 if (dx < umin && dx >= 0.0) dx = umin; 2623 else if (dx < 0.0 && dx > -umin) dx = -umin; 2624 #else 2625 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2626 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2627 #endif 2628 dx *= epsilon; 2629 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2630 w3_array[col] += dx; 2631 } 2632 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2633 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2634 2635 /* 2636 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2637 w2 = F(x1 + dx) - F(x1) 2638 */ 2639 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2640 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2641 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2642 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2643 2644 /* 2645 Loop over rows of vector, putting results into Jacobian matrix 2646 */ 2647 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2648 for (l=0; l<coloring->nrows[k]; l++) { 2649 row = bs*coloring->rows[k][l]; /* local row index */ 2650 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2651 for (j=0; j<bs; j++) { 2652 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2653 srows[j] = row + start + j; 2654 } 2655 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2656 } 2657 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2658 } 2659 } /* endof for each color */ 2660 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2661 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2662 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2663 ierr = PetscFree(srows);CHKERRQ(ierr); 2664 2665 coloring->currentcolor = -1; 2666 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2667 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2668 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 /* -------------------------------------------------------------------*/ 2673 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2674 MatGetRow_SeqBAIJ, 2675 MatRestoreRow_SeqBAIJ, 2676 MatMult_SeqBAIJ_N, 2677 /* 4*/ MatMultAdd_SeqBAIJ_N, 2678 MatMultTranspose_SeqBAIJ, 2679 MatMultTransposeAdd_SeqBAIJ, 2680 0, 2681 0, 2682 0, 2683 /*10*/ 0, 2684 MatLUFactor_SeqBAIJ, 2685 0, 2686 0, 2687 MatTranspose_SeqBAIJ, 2688 /*15*/ MatGetInfo_SeqBAIJ, 2689 MatEqual_SeqBAIJ, 2690 MatGetDiagonal_SeqBAIJ, 2691 MatDiagonalScale_SeqBAIJ, 2692 MatNorm_SeqBAIJ, 2693 /*20*/ 0, 2694 MatAssemblyEnd_SeqBAIJ, 2695 MatSetOption_SeqBAIJ, 2696 MatZeroEntries_SeqBAIJ, 2697 /*24*/ MatZeroRows_SeqBAIJ, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2703 0, 2704 0, 2705 MatGetArray_SeqBAIJ, 2706 MatRestoreArray_SeqBAIJ, 2707 /*34*/ MatDuplicate_SeqBAIJ, 2708 0, 2709 0, 2710 MatILUFactor_SeqBAIJ, 2711 0, 2712 /*39*/ MatAXPY_SeqBAIJ, 2713 MatGetSubMatrices_SeqBAIJ, 2714 MatIncreaseOverlap_SeqBAIJ, 2715 MatGetValues_SeqBAIJ, 2716 MatCopy_SeqBAIJ, 2717 /*44*/ 0, 2718 MatScale_SeqBAIJ, 2719 0, 2720 0, 2721 MatILUDTFactor_SeqBAIJ, 2722 /*49*/ 0, 2723 MatGetRowIJ_SeqBAIJ, 2724 MatRestoreRowIJ_SeqBAIJ, 2725 MatGetColumnIJ_SeqBAIJ, 2726 MatRestoreColumnIJ_SeqBAIJ, 2727 /*54*/ MatFDColoringCreate_SeqAIJ, 2728 0, 2729 0, 2730 0, 2731 MatSetValuesBlocked_SeqBAIJ, 2732 /*59*/ MatGetSubMatrix_SeqBAIJ, 2733 MatDestroy_SeqBAIJ, 2734 MatView_SeqBAIJ, 2735 0, 2736 0, 2737 /*64*/ 0, 2738 0, 2739 0, 2740 0, 2741 0, 2742 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2743 0, 2744 MatConvert_Basic, 2745 0, 2746 0, 2747 /*74*/ 0, 2748 MatFDColoringApply_BAIJ, 2749 0, 2750 0, 2751 0, 2752 /*79*/ 0, 2753 0, 2754 0, 2755 0, 2756 MatLoad_SeqBAIJ, 2757 /*84*/ 0, 2758 0, 2759 0, 2760 0, 2761 0, 2762 /*89*/ 0, 2763 0, 2764 0, 2765 0, 2766 0, 2767 /*94*/ 0, 2768 0, 2769 0, 2770 0, 2771 0, 2772 /*99*/0, 2773 0, 2774 0, 2775 0, 2776 0, 2777 /*104*/0, 2778 MatRealPart_SeqBAIJ, 2779 MatImaginaryPart_SeqBAIJ, 2780 0, 2781 0, 2782 /*109*/0, 2783 0, 2784 0, 2785 0, 2786 MatMissingDiagonal_SeqBAIJ, 2787 /*114*/0, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*119*/0, 2793 0, 2794 MatMultHermitianTranspose_SeqBAIJ, 2795 MatMultHermitianTransposeAdd_SeqBAIJ 2796 }; 2797 2798 EXTERN_C_BEGIN 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2801 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2802 { 2803 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2804 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2805 PetscErrorCode ierr; 2806 2807 PetscFunctionBegin; 2808 if (aij->nonew != 1) { 2809 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2810 } 2811 2812 /* allocate space for values if not already there */ 2813 if (!aij->saved_values) { 2814 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2815 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2816 } 2817 2818 /* copy values over */ 2819 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2820 PetscFunctionReturn(0); 2821 } 2822 EXTERN_C_END 2823 2824 EXTERN_C_BEGIN 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2827 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2828 { 2829 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2830 PetscErrorCode ierr; 2831 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2832 2833 PetscFunctionBegin; 2834 if (aij->nonew != 1) { 2835 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2836 } 2837 if (!aij->saved_values) { 2838 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2839 } 2840 2841 /* copy values over */ 2842 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 EXTERN_C_END 2846 2847 EXTERN_C_BEGIN 2848 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2849 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2850 EXTERN_C_END 2851 2852 EXTERN_C_BEGIN 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2855 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2856 { 2857 Mat_SeqBAIJ *b; 2858 PetscErrorCode ierr; 2859 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2860 PetscTruth flg,skipallocation = PETSC_FALSE; 2861 2862 PetscFunctionBegin; 2863 2864 if (nz == MAT_SKIP_ALLOCATION) { 2865 skipallocation = PETSC_TRUE; 2866 nz = 0; 2867 } 2868 2869 if (bs < 0) { 2870 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2871 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2872 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2873 bs = PetscAbs(bs); 2874 } 2875 if (nnz && newbs != bs) { 2876 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2877 } 2878 bs = newbs; 2879 2880 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2881 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2882 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2883 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2884 2885 B->preallocated = PETSC_TRUE; 2886 2887 mbs = B->rmap->n/bs; 2888 nbs = B->cmap->n/bs; 2889 bs2 = bs*bs; 2890 2891 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2892 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2893 } 2894 2895 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2896 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2897 if (nnz) { 2898 for (i=0; i<mbs; i++) { 2899 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2900 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2901 } 2902 } 2903 2904 b = (Mat_SeqBAIJ*)B->data; 2905 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2906 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2907 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2908 2909 if (!flg) { 2910 switch (bs) { 2911 case 1: 2912 B->ops->mult = MatMult_SeqBAIJ_1; 2913 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2914 B->ops->sor = MatSOR_SeqBAIJ_1; 2915 break; 2916 case 2: 2917 B->ops->mult = MatMult_SeqBAIJ_2; 2918 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2919 B->ops->sor = MatSOR_SeqBAIJ_2; 2920 break; 2921 case 3: 2922 B->ops->mult = MatMult_SeqBAIJ_3; 2923 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2924 B->ops->sor = MatSOR_SeqBAIJ_3; 2925 break; 2926 case 4: 2927 B->ops->mult = MatMult_SeqBAIJ_4; 2928 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2929 B->ops->sor = MatSOR_SeqBAIJ_4; 2930 break; 2931 case 5: 2932 B->ops->mult = MatMult_SeqBAIJ_5; 2933 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2934 B->ops->sor = MatSOR_SeqBAIJ_5; 2935 break; 2936 case 6: 2937 B->ops->mult = MatMult_SeqBAIJ_6; 2938 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2939 B->ops->sor = MatSOR_SeqBAIJ_6; 2940 break; 2941 case 7: 2942 B->ops->mult = MatMult_SeqBAIJ_7; 2943 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2944 B->ops->sor = MatSOR_SeqBAIJ_7; 2945 break; 2946 default: 2947 B->ops->mult = MatMult_SeqBAIJ_N; 2948 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2949 break; 2950 } 2951 } 2952 B->rmap->bs = bs; 2953 b->mbs = mbs; 2954 b->nbs = nbs; 2955 if (!skipallocation) { 2956 if (!b->imax) { 2957 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2958 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2959 b->free_imax_ilen = PETSC_TRUE; 2960 } 2961 /* b->ilen will count nonzeros in each block row so far. */ 2962 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2963 if (!nnz) { 2964 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2965 else if (nz <= 0) nz = 1; 2966 for (i=0; i<mbs; i++) b->imax[i] = nz; 2967 nz = nz*mbs; 2968 } else { 2969 nz = 0; 2970 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2971 } 2972 2973 /* allocate the matrix space */ 2974 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2975 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2976 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2977 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2978 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2979 b->singlemalloc = PETSC_TRUE; 2980 b->i[0] = 0; 2981 for (i=1; i<mbs+1; i++) { 2982 b->i[i] = b->i[i-1] + b->imax[i-1]; 2983 } 2984 b->free_a = PETSC_TRUE; 2985 b->free_ij = PETSC_TRUE; 2986 } else { 2987 b->free_a = PETSC_FALSE; 2988 b->free_ij = PETSC_FALSE; 2989 } 2990 2991 B->rmap->bs = bs; 2992 b->bs2 = bs2; 2993 b->mbs = mbs; 2994 b->nz = 0; 2995 b->maxnz = nz*bs2; 2996 B->info.nz_unneeded = (PetscReal)b->maxnz; 2997 PetscFunctionReturn(0); 2998 } 2999 EXTERN_C_END 3000 3001 EXTERN_C_BEGIN 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3004 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3005 { 3006 PetscInt i,m,nz,nz_max=0,*nnz; 3007 PetscScalar *values=0; 3008 PetscErrorCode ierr; 3009 3010 PetscFunctionBegin; 3011 3012 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3013 3014 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3015 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3016 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3018 m = B->rmap->n/bs; 3019 3020 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3021 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3022 for(i=0; i<m; i++) { 3023 nz = ii[i+1]- ii[i]; 3024 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3025 nz_max = PetscMax(nz_max, nz); 3026 nnz[i] = nz; 3027 } 3028 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3029 ierr = PetscFree(nnz);CHKERRQ(ierr); 3030 3031 values = (PetscScalar*)V; 3032 if (!values) { 3033 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3034 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3035 } 3036 for (i=0; i<m; i++) { 3037 PetscInt ncols = ii[i+1] - ii[i]; 3038 const PetscInt *icols = jj + ii[i]; 3039 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3040 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3041 } 3042 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3043 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3044 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 3046 PetscFunctionReturn(0); 3047 } 3048 EXTERN_C_END 3049 3050 3051 EXTERN_C_BEGIN 3052 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3053 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3054 EXTERN_C_END 3055 3056 /*MC 3057 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3058 block sparse compressed row format. 3059 3060 Options Database Keys: 3061 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3062 3063 Level: beginner 3064 3065 .seealso: MatCreateSeqBAIJ() 3066 M*/ 3067 3068 3069 EXTERN_C_BEGIN 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatCreate_SeqBAIJ" 3072 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3073 { 3074 PetscErrorCode ierr; 3075 PetscMPIInt size; 3076 Mat_SeqBAIJ *b; 3077 3078 PetscFunctionBegin; 3079 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3080 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3081 3082 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3083 B->data = (void*)b; 3084 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3085 B->mapping = 0; 3086 b->row = 0; 3087 b->col = 0; 3088 b->icol = 0; 3089 b->reallocs = 0; 3090 b->saved_values = 0; 3091 3092 b->roworiented = PETSC_TRUE; 3093 b->nonew = 0; 3094 b->diag = 0; 3095 b->solve_work = 0; 3096 b->mult_work = 0; 3097 B->spptr = 0; 3098 B->info.nz_unneeded = (PetscReal)b->maxnz; 3099 b->keepnonzeropattern = PETSC_FALSE; 3100 b->xtoy = 0; 3101 b->XtoY = 0; 3102 b->compressedrow.use = PETSC_FALSE; 3103 b->compressedrow.nrows = 0; 3104 b->compressedrow.i = PETSC_NULL; 3105 b->compressedrow.rindex = PETSC_NULL; 3106 b->compressedrow.checked = PETSC_FALSE; 3107 B->same_nonzero = PETSC_FALSE; 3108 3109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3110 "MatGetFactorAvailable_seqbaij_petsc", 3111 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3113 "MatGetFactor_seqbaij_petsc", 3114 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3116 "MatInvertBlockDiagonal_SeqBAIJ", 3117 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3119 "MatStoreValues_SeqBAIJ", 3120 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3122 "MatRetrieveValues_SeqBAIJ", 3123 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3125 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3126 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3128 "MatConvert_SeqBAIJ_SeqAIJ", 3129 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3131 "MatConvert_SeqBAIJ_SeqSBAIJ", 3132 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3134 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3135 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3137 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3138 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3139 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 EXTERN_C_END 3143 3144 #undef __FUNCT__ 3145 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3146 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3147 { 3148 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3149 PetscErrorCode ierr; 3150 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3151 3152 PetscFunctionBegin; 3153 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3154 3155 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3156 c->imax = a->imax; 3157 c->ilen = a->ilen; 3158 c->free_imax_ilen = PETSC_FALSE; 3159 } else { 3160 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3161 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3162 for (i=0; i<mbs; i++) { 3163 c->imax[i] = a->imax[i]; 3164 c->ilen[i] = a->ilen[i]; 3165 } 3166 c->free_imax_ilen = PETSC_TRUE; 3167 } 3168 3169 /* allocate the matrix space */ 3170 if (mallocmatspace){ 3171 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3172 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3173 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3174 c->singlemalloc = PETSC_FALSE; 3175 c->free_ij = PETSC_FALSE; 3176 c->i = a->i; 3177 c->j = a->j; 3178 c->parent = A; 3179 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3180 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3181 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3182 } else { 3183 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3184 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3185 c->singlemalloc = PETSC_TRUE; 3186 c->free_ij = PETSC_TRUE; 3187 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3188 if (mbs > 0) { 3189 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3190 if (cpvalues == MAT_COPY_VALUES) { 3191 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3192 } else { 3193 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3194 } 3195 } 3196 } 3197 } 3198 3199 c->roworiented = a->roworiented; 3200 c->nonew = a->nonew; 3201 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3202 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3203 c->bs2 = a->bs2; 3204 c->mbs = a->mbs; 3205 c->nbs = a->nbs; 3206 3207 if (a->diag) { 3208 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3209 c->diag = a->diag; 3210 c->free_diag = PETSC_FALSE; 3211 } else { 3212 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3213 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3214 for (i=0; i<mbs; i++) { 3215 c->diag[i] = a->diag[i]; 3216 } 3217 c->free_diag = PETSC_TRUE; 3218 } 3219 } else c->diag = 0; 3220 c->nz = a->nz; 3221 c->maxnz = a->maxnz; 3222 c->solve_work = 0; 3223 c->mult_work = 0; 3224 c->free_a = PETSC_TRUE; 3225 c->free_ij = PETSC_TRUE; 3226 C->preallocated = PETSC_TRUE; 3227 C->assembled = PETSC_TRUE; 3228 3229 c->compressedrow.use = a->compressedrow.use; 3230 c->compressedrow.nrows = a->compressedrow.nrows; 3231 c->compressedrow.checked = a->compressedrow.checked; 3232 if ( a->compressedrow.checked && a->compressedrow.use){ 3233 i = a->compressedrow.nrows; 3234 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 3235 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3236 c->compressedrow.rindex = c->compressedrow.i + i + 1; 3237 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3238 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3239 } else { 3240 c->compressedrow.use = PETSC_FALSE; 3241 c->compressedrow.i = PETSC_NULL; 3242 c->compressedrow.rindex = PETSC_NULL; 3243 } 3244 C->same_nonzero = A->same_nonzero; 3245 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3251 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3252 { 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3257 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3258 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3259 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3260 PetscFunctionReturn(0); 3261 } 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatLoad_SeqBAIJ" 3265 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3266 { 3267 Mat_SeqBAIJ *a; 3268 Mat B; 3269 PetscErrorCode ierr; 3270 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3271 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3272 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3273 PetscInt *masked,nmask,tmp,bs2,ishift; 3274 PetscMPIInt size; 3275 int fd; 3276 PetscScalar *aa; 3277 MPI_Comm comm = ((PetscObject)viewer)->comm; 3278 3279 PetscFunctionBegin; 3280 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3281 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3282 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3283 bs2 = bs*bs; 3284 3285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3286 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3287 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3289 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3290 M = header[1]; N = header[2]; nz = header[3]; 3291 3292 if (header[3] < 0) { 3293 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3294 } 3295 3296 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3297 3298 /* 3299 This code adds extra rows to make sure the number of rows is 3300 divisible by the blocksize 3301 */ 3302 mbs = M/bs; 3303 extra_rows = bs - M + bs*(mbs); 3304 if (extra_rows == bs) extra_rows = 0; 3305 else mbs++; 3306 if (extra_rows) { 3307 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3308 } 3309 3310 /* read in row lengths */ 3311 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3312 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3313 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3314 3315 /* read in column indices */ 3316 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3317 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3318 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3319 3320 /* loop over row lengths determining block row lengths */ 3321 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3322 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3323 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3324 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3325 masked = mask + mbs; 3326 rowcount = 0; nzcount = 0; 3327 for (i=0; i<mbs; i++) { 3328 nmask = 0; 3329 for (j=0; j<bs; j++) { 3330 kmax = rowlengths[rowcount]; 3331 for (k=0; k<kmax; k++) { 3332 tmp = jj[nzcount++]/bs; 3333 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3334 } 3335 rowcount++; 3336 } 3337 browlengths[i] += nmask; 3338 /* zero out the mask elements we set */ 3339 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3340 } 3341 3342 /* create our matrix */ 3343 ierr = MatCreate(comm,&B); 3344 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3345 ierr = MatSetType(B,type);CHKERRQ(ierr); 3346 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3347 a = (Mat_SeqBAIJ*)B->data; 3348 3349 /* set matrix "i" values */ 3350 a->i[0] = 0; 3351 for (i=1; i<= mbs; i++) { 3352 a->i[i] = a->i[i-1] + browlengths[i-1]; 3353 a->ilen[i-1] = browlengths[i-1]; 3354 } 3355 a->nz = 0; 3356 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3357 3358 /* read in nonzero values */ 3359 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3360 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3361 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3362 3363 /* set "a" and "j" values into matrix */ 3364 nzcount = 0; jcount = 0; 3365 for (i=0; i<mbs; i++) { 3366 nzcountb = nzcount; 3367 nmask = 0; 3368 for (j=0; j<bs; j++) { 3369 kmax = rowlengths[i*bs+j]; 3370 for (k=0; k<kmax; k++) { 3371 tmp = jj[nzcount++]/bs; 3372 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3373 } 3374 } 3375 /* sort the masked values */ 3376 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3377 3378 /* set "j" values into matrix */ 3379 maskcount = 1; 3380 for (j=0; j<nmask; j++) { 3381 a->j[jcount++] = masked[j]; 3382 mask[masked[j]] = maskcount++; 3383 } 3384 /* set "a" values into matrix */ 3385 ishift = bs2*a->i[i]; 3386 for (j=0; j<bs; j++) { 3387 kmax = rowlengths[i*bs+j]; 3388 for (k=0; k<kmax; k++) { 3389 tmp = jj[nzcountb]/bs ; 3390 block = mask[tmp] - 1; 3391 point = jj[nzcountb] - bs*tmp; 3392 idx = ishift + bs2*block + j + bs*point; 3393 a->a[idx] = (MatScalar)aa[nzcountb++]; 3394 } 3395 } 3396 /* zero out the mask elements we set */ 3397 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3398 } 3399 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3400 3401 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3402 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3403 ierr = PetscFree(aa);CHKERRQ(ierr); 3404 ierr = PetscFree(jj);CHKERRQ(ierr); 3405 ierr = PetscFree(mask);CHKERRQ(ierr); 3406 3407 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3408 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3409 ierr = MatView_Private(B);CHKERRQ(ierr); 3410 3411 *A = B; 3412 PetscFunctionReturn(0); 3413 } 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatCreateSeqBAIJ" 3417 /*@C 3418 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3419 compressed row) format. For good matrix assembly performance the 3420 user should preallocate the matrix storage by setting the parameter nz 3421 (or the array nnz). By setting these parameters accurately, performance 3422 during matrix assembly can be increased by more than a factor of 50. 3423 3424 Collective on MPI_Comm 3425 3426 Input Parameters: 3427 + comm - MPI communicator, set to PETSC_COMM_SELF 3428 . bs - size of block 3429 . m - number of rows 3430 . n - number of columns 3431 . nz - number of nonzero blocks per block row (same for all rows) 3432 - nnz - array containing the number of nonzero blocks in the various block rows 3433 (possibly different for each block row) or PETSC_NULL 3434 3435 Output Parameter: 3436 . A - the matrix 3437 3438 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3439 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3440 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3441 3442 Options Database Keys: 3443 . -mat_no_unroll - uses code that does not unroll the loops in the 3444 block calculations (much slower) 3445 . -mat_block_size - size of the blocks to use 3446 3447 Level: intermediate 3448 3449 Notes: 3450 The number of rows and columns must be divisible by blocksize. 3451 3452 If the nnz parameter is given then the nz parameter is ignored 3453 3454 A nonzero block is any block that as 1 or more nonzeros in it 3455 3456 The block AIJ format is fully compatible with standard Fortran 77 3457 storage. That is, the stored row and column indices can begin at 3458 either one (as in Fortran) or zero. See the users' manual for details. 3459 3460 Specify the preallocated storage with either nz or nnz (not both). 3461 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3462 allocation. For additional details, see the users manual chapter on 3463 matrices. 3464 3465 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3466 @*/ 3467 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3468 { 3469 PetscErrorCode ierr; 3470 3471 PetscFunctionBegin; 3472 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3473 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3474 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3475 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3481 /*@C 3482 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3483 per row in the matrix. For good matrix assembly performance the 3484 user should preallocate the matrix storage by setting the parameter nz 3485 (or the array nnz). By setting these parameters accurately, performance 3486 during matrix assembly can be increased by more than a factor of 50. 3487 3488 Collective on MPI_Comm 3489 3490 Input Parameters: 3491 + A - the matrix 3492 . bs - size of block 3493 . nz - number of block nonzeros per block row (same for all rows) 3494 - nnz - array containing the number of block nonzeros in the various block rows 3495 (possibly different for each block row) or PETSC_NULL 3496 3497 Options Database Keys: 3498 . -mat_no_unroll - uses code that does not unroll the loops in the 3499 block calculations (much slower) 3500 . -mat_block_size - size of the blocks to use 3501 3502 Level: intermediate 3503 3504 Notes: 3505 If the nnz parameter is given then the nz parameter is ignored 3506 3507 You can call MatGetInfo() to get information on how effective the preallocation was; 3508 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3509 You can also run with the option -info and look for messages with the string 3510 malloc in them to see if additional memory allocation was needed. 3511 3512 The block AIJ format is fully compatible with standard Fortran 77 3513 storage. That is, the stored row and column indices can begin at 3514 either one (as in Fortran) or zero. See the users' manual for details. 3515 3516 Specify the preallocated storage with either nz or nnz (not both). 3517 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3518 allocation. For additional details, see the users manual chapter on 3519 matrices. 3520 3521 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3522 @*/ 3523 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3524 { 3525 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3526 3527 PetscFunctionBegin; 3528 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3529 if (f) { 3530 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3531 } 3532 PetscFunctionReturn(0); 3533 } 3534 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3537 /*@C 3538 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3539 (the default sequential PETSc format). 3540 3541 Collective on MPI_Comm 3542 3543 Input Parameters: 3544 + A - the matrix 3545 . i - the indices into j for the start of each local row (starts with zero) 3546 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3547 - v - optional values in the matrix 3548 3549 Level: developer 3550 3551 .keywords: matrix, aij, compressed row, sparse 3552 3553 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3554 @*/ 3555 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3556 { 3557 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3558 3559 PetscFunctionBegin; 3560 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3561 if (f) { 3562 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3563 } 3564 PetscFunctionReturn(0); 3565 } 3566 3567 3568 #undef __FUNCT__ 3569 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3570 /*@ 3571 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3572 (upper triangular entries in CSR format) provided by the user. 3573 3574 Collective on MPI_Comm 3575 3576 Input Parameters: 3577 + comm - must be an MPI communicator of size 1 3578 . bs - size of block 3579 . m - number of rows 3580 . n - number of columns 3581 . i - row indices 3582 . j - column indices 3583 - a - matrix values 3584 3585 Output Parameter: 3586 . mat - the matrix 3587 3588 Level: intermediate 3589 3590 Notes: 3591 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3592 once the matrix is destroyed 3593 3594 You cannot set new nonzero locations into this matrix, that will generate an error. 3595 3596 The i and j indices are 0 based 3597 3598 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3599 3600 @*/ 3601 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3602 { 3603 PetscErrorCode ierr; 3604 PetscInt ii; 3605 Mat_SeqBAIJ *baij; 3606 3607 PetscFunctionBegin; 3608 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3609 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3610 3611 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3612 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3613 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3614 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3615 baij = (Mat_SeqBAIJ*)(*mat)->data; 3616 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3617 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3618 3619 baij->i = i; 3620 baij->j = j; 3621 baij->a = a; 3622 baij->singlemalloc = PETSC_FALSE; 3623 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3624 baij->free_a = PETSC_FALSE; 3625 baij->free_ij = PETSC_FALSE; 3626 3627 for (ii=0; ii<m; ii++) { 3628 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3629 #if defined(PETSC_USE_DEBUG) 3630 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3631 #endif 3632 } 3633 #if defined(PETSC_USE_DEBUG) 3634 for (ii=0; ii<baij->i[m]; ii++) { 3635 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3636 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3637 } 3638 #endif 3639 3640 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3642 PetscFunctionReturn(0); 3643 } 3644