1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 1: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 1*diag_offset[i]; 68 diag[0] = odiag[0]; 69 mdiag[0] = odiag[0]; 70 diag[0] = 1.0 / (diag[0] + shift); 71 diag += 1; 72 mdiag += 1; 73 } 74 break; 75 case 2: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 4*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 81 diag += 4; 82 mdiag += 4; 83 } 84 break; 85 case 3: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 9*diag_offset[i]; 88 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 89 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 90 diag[8] = odiag[8]; 91 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 92 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 93 mdiag[8] = odiag[8]; 94 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 95 diag += 9; 96 mdiag += 9; 97 } 98 break; 99 case 4: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 16*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 105 diag += 16; 106 mdiag += 16; 107 } 108 break; 109 case 5: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 25*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 115 diag += 25; 116 mdiag += 25; 117 } 118 break; 119 case 6: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 36*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 125 diag += 36; 126 mdiag += 36; 127 } 128 break; 129 default: 130 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 139 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 PetscScalar *x,x1,s1; 143 const PetscScalar *b; 144 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 145 PetscErrorCode ierr; 146 PetscInt m = a->mbs,i,i2,nz,j; 147 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 148 149 PetscFunctionBegin; 150 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 151 its = its*lits; 152 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 153 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 154 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 155 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 156 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 157 158 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 159 160 diag = a->diag; 161 idiag = a->idiag; 162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 163 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 164 165 if (flag & SOR_ZERO_INITIAL_GUESS) { 166 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 167 x[0] = b[0]*idiag[0]; 168 i2 = 1; 169 idiag += 1; 170 for (i=1; i<m; i++) { 171 v = aa + ai[i]; 172 vi = aj + ai[i]; 173 nz = diag[i] - ai[i]; 174 s1 = b[i2]; 175 for (j=0; j<nz; j++) { 176 s1 -= v[j]*x[vi[j]]; 177 } 178 x[i2] = idiag[0]*s1; 179 idiag += 1; 180 i2 += 1; 181 } 182 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 183 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 184 } 185 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 186 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 187 i2 = 0; 188 mdiag = a->idiag+a->mbs; 189 for (i=0; i<m; i++) { 190 x1 = x[i2]; 191 x[i2] = mdiag[0]*x1; 192 mdiag += 1; 193 i2 += 1; 194 } 195 ierr = PetscLogFlops(m);CHKERRQ(ierr); 196 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 197 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 198 } 199 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 200 idiag = a->idiag+a->mbs - 1; 201 i2 = m - 1; 202 x1 = x[i2]; 203 x[i2] = idiag[0]*x1; 204 idiag -= 1; 205 i2 -= 1; 206 for (i=m-2; i>=0; i--) { 207 v = aa + (diag[i]+1); 208 vi = aj + diag[i] + 1; 209 nz = ai[i+1] - diag[i] - 1; 210 s1 = x[i2]; 211 for (j=0; j<nz; j++) { 212 s1 -= v[j]*x[vi[j]]; 213 } 214 x[i2] = idiag[0]*s1; 215 idiag -= 1; 216 i2 -= 1; 217 } 218 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 219 } 220 } else { 221 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 222 } 223 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 #undef __FUNCT__ 229 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 230 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 231 { 232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 233 PetscScalar *x,x1,x2,s1,s2; 234 const PetscScalar *b; 235 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 236 PetscErrorCode ierr; 237 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 238 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 239 240 PetscFunctionBegin; 241 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 242 its = its*lits; 243 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 244 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 245 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 246 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 247 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 248 249 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 250 251 diag = a->diag; 252 idiag = a->idiag; 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 255 256 if (flag & SOR_ZERO_INITIAL_GUESS) { 257 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 258 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 259 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 260 i2 = 2; 261 idiag += 4; 262 for (i=1; i<m; i++) { 263 v = aa + 4*ai[i]; 264 vi = aj + ai[i]; 265 nz = diag[i] - ai[i]; 266 s1 = b[i2]; s2 = b[i2+1]; 267 for (j=0; j<nz; j++) { 268 idx = 2*vi[j]; 269 it = 4*j; 270 x1 = x[idx]; x2 = x[1+idx]; 271 s1 -= v[it]*x1 + v[it+2]*x2; 272 s2 -= v[it+1]*x1 + v[it+3]*x2; 273 } 274 x[i2] = idiag[0]*s1 + idiag[2]*s2; 275 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 276 idiag += 4; 277 i2 += 2; 278 } 279 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 280 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 281 } 282 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 283 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 284 i2 = 0; 285 mdiag = a->idiag+4*a->mbs; 286 for (i=0; i<m; i++) { 287 x1 = x[i2]; x2 = x[i2+1]; 288 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 289 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 290 mdiag += 4; 291 i2 += 2; 292 } 293 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 294 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 295 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 296 } 297 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 298 idiag = a->idiag+4*a->mbs - 4; 299 i2 = 2*m - 2; 300 x1 = x[i2]; x2 = x[i2+1]; 301 x[i2] = idiag[0]*x1 + idiag[2]*x2; 302 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 303 idiag -= 4; 304 i2 -= 2; 305 for (i=m-2; i>=0; i--) { 306 v = aa + 4*(diag[i]+1); 307 vi = aj + diag[i] + 1; 308 nz = ai[i+1] - diag[i] - 1; 309 s1 = x[i2]; s2 = x[i2+1]; 310 for (j=0; j<nz; j++) { 311 idx = 2*vi[j]; 312 it = 4*j; 313 x1 = x[idx]; x2 = x[1+idx]; 314 s1 -= v[it]*x1 + v[it+2]*x2; 315 s2 -= v[it+1]*x1 + v[it+3]*x2; 316 } 317 x[i2] = idiag[0]*s1 + idiag[2]*s2; 318 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 319 idiag -= 4; 320 i2 -= 2; 321 } 322 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 323 } 324 } else { 325 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 326 } 327 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 328 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 329 PetscFunctionReturn(0); 330 } 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 334 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 335 { 336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 337 PetscScalar *x,x1,x2,x3,s1,s2,s3; 338 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 339 const PetscScalar *b; 340 PetscErrorCode ierr; 341 PetscInt m = a->mbs,i,i2,nz,idx; 342 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 343 344 PetscFunctionBegin; 345 its = its*lits; 346 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 347 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 348 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 349 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 350 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 351 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 352 353 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 354 355 diag = a->diag; 356 idiag = a->idiag; 357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 359 360 if (flag & SOR_ZERO_INITIAL_GUESS) { 361 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 362 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 363 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 364 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 365 i2 = 3; 366 idiag += 9; 367 for (i=1; i<m; i++) { 368 v = aa + 9*ai[i]; 369 vi = aj + ai[i]; 370 nz = diag[i] - ai[i]; 371 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 372 while (nz--) { 373 idx = 3*(*vi++); 374 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 375 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 376 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 377 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 378 v += 9; 379 } 380 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 381 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 382 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 383 idiag += 9; 384 i2 += 3; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 388 } 389 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 390 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 391 i2 = 0; 392 mdiag = a->idiag+9*a->mbs; 393 for (i=0; i<m; i++) { 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 396 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 397 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 398 mdiag += 9; 399 i2 += 3; 400 } 401 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+9*a->mbs - 9; 407 i2 = 3*m - 3; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 409 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 410 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 411 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 412 idiag -= 9; 413 i2 -= 3; 414 for (i=m-2; i>=0; i--) { 415 v = aa + 9*(diag[i]+1); 416 vi = aj + diag[i] + 1; 417 nz = ai[i+1] - diag[i] - 1; 418 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 419 while (nz--) { 420 idx = 3*(*vi++); 421 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 422 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 423 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 424 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 425 v += 9; 426 } 427 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 428 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 429 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 430 idiag -= 9; 431 i2 -= 3; 432 } 433 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 445 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 449 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 450 const PetscScalar *b; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 457 its = its*lits; 458 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 459 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 460 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 461 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 462 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 463 464 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 465 466 diag = a->diag; 467 idiag = a->idiag; 468 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 469 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 470 471 if (flag & SOR_ZERO_INITIAL_GUESS) { 472 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 473 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 474 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 475 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 476 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 477 i2 = 4; 478 idiag += 16; 479 for (i=1; i<m; i++) { 480 v = aa + 16*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 484 while (nz--) { 485 idx = 4*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 487 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 488 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 489 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 490 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 491 v += 16; 492 } 493 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 494 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 495 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 496 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 497 idiag += 16; 498 i2 += 4; 499 } 500 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 501 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 502 } 503 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 504 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 505 i2 = 0; 506 mdiag = a->idiag+16*a->mbs; 507 for (i=0; i<m; i++) { 508 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 509 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 510 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 511 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 512 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 513 mdiag += 16; 514 i2 += 4; 515 } 516 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 517 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 518 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 519 } 520 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 521 idiag = a->idiag+16*a->mbs - 16; 522 i2 = 4*m - 4; 523 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 524 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 525 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 526 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 527 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 528 idiag -= 16; 529 i2 -= 4; 530 for (i=m-2; i>=0; i--) { 531 v = aa + 16*(diag[i]+1); 532 vi = aj + diag[i] + 1; 533 nz = ai[i+1] - diag[i] - 1; 534 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 535 while (nz--) { 536 idx = 4*(*vi++); 537 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 538 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 539 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 540 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 541 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 542 v += 16; 543 } 544 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 545 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 546 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 547 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 548 idiag -= 16; 549 i2 -= 4; 550 } 551 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 552 } 553 } else { 554 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 555 } 556 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 557 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 563 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 564 { 565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 566 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 567 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 568 const PetscScalar *b; 569 PetscErrorCode ierr; 570 PetscInt m = a->mbs,i,i2,nz,idx; 571 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 572 573 PetscFunctionBegin; 574 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 575 its = its*lits; 576 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 577 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 578 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 579 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 580 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 581 582 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 583 584 diag = a->diag; 585 idiag = a->idiag; 586 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 587 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 588 589 if (flag & SOR_ZERO_INITIAL_GUESS) { 590 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 591 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 592 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 593 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 594 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 595 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 596 i2 = 5; 597 idiag += 25; 598 for (i=1; i<m; i++) { 599 v = aa + 25*ai[i]; 600 vi = aj + ai[i]; 601 nz = diag[i] - ai[i]; 602 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 603 while (nz--) { 604 idx = 5*(*vi++); 605 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 606 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 607 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 608 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 609 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 610 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 611 v += 25; 612 } 613 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 614 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 615 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 616 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 617 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 618 idiag += 25; 619 i2 += 5; 620 } 621 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 622 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 623 } 624 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 625 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 626 i2 = 0; 627 mdiag = a->idiag+25*a->mbs; 628 for (i=0; i<m; i++) { 629 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 630 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 631 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 632 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 633 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 634 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 635 mdiag += 25; 636 i2 += 5; 637 } 638 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 639 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 640 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 641 } 642 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 643 idiag = a->idiag+25*a->mbs - 25; 644 i2 = 5*m - 5; 645 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 646 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 647 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 648 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 649 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 650 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 651 idiag -= 25; 652 i2 -= 5; 653 for (i=m-2; i>=0; i--) { 654 v = aa + 25*(diag[i]+1); 655 vi = aj + diag[i] + 1; 656 nz = ai[i+1] - diag[i] - 1; 657 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 658 while (nz--) { 659 idx = 5*(*vi++); 660 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 661 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 662 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 663 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 664 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 665 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 666 v += 25; 667 } 668 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 669 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 670 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 671 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 672 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 673 idiag -= 25; 674 i2 -= 5; 675 } 676 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 677 } 678 } else { 679 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 680 } 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 688 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 692 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 693 const PetscScalar *b; 694 PetscErrorCode ierr; 695 PetscInt m = a->mbs,i,i2,nz,idx; 696 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 697 698 PetscFunctionBegin; 699 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 700 its = its*lits; 701 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 702 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 703 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 704 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 705 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 706 707 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 708 709 diag = a->diag; 710 idiag = a->idiag; 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 713 714 if (flag & SOR_ZERO_INITIAL_GUESS) { 715 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 716 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 717 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 718 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 719 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 720 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 721 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 722 i2 = 6; 723 idiag += 36; 724 for (i=1; i<m; i++) { 725 v = aa + 36*ai[i]; 726 vi = aj + ai[i]; 727 nz = diag[i] - ai[i]; 728 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 729 while (nz--) { 730 idx = 6*(*vi++); 731 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 732 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 733 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 734 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 735 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 736 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 737 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 738 v += 36; 739 } 740 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 741 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 742 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 743 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 744 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 745 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 746 idiag += 36; 747 i2 += 6; 748 } 749 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 750 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 751 } 752 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 753 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 754 i2 = 0; 755 mdiag = a->idiag+36*a->mbs; 756 for (i=0; i<m; i++) { 757 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 758 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 759 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 760 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 761 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 762 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 763 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 764 mdiag += 36; 765 i2 += 6; 766 } 767 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 768 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 769 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 770 } 771 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 772 idiag = a->idiag+36*a->mbs - 36; 773 i2 = 6*m - 6; 774 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 775 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 776 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 777 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 778 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 779 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 780 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 781 idiag -= 36; 782 i2 -= 6; 783 for (i=m-2; i>=0; i--) { 784 v = aa + 36*(diag[i]+1); 785 vi = aj + diag[i] + 1; 786 nz = ai[i+1] - diag[i] - 1; 787 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 788 while (nz--) { 789 idx = 6*(*vi++); 790 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 791 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 792 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 793 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 794 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 795 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 796 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 797 v += 36; 798 } 799 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 800 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 801 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 802 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 803 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 804 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 805 idiag -= 36; 806 i2 -= 6; 807 } 808 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 809 } 810 } else { 811 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 812 } 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNCT__ 819 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 820 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 821 { 822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 823 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 824 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 825 const PetscScalar *b; 826 PetscErrorCode ierr; 827 PetscInt m = a->mbs,i,i2,nz,idx; 828 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 829 830 PetscFunctionBegin; 831 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 832 its = its*lits; 833 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 834 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 835 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 836 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 837 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 838 839 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 840 841 diag = a->diag; 842 idiag = a->idiag; 843 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 844 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 845 846 if (flag & SOR_ZERO_INITIAL_GUESS) { 847 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 848 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 849 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 850 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 851 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 852 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 853 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 854 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 855 i2 = 7; 856 idiag += 49; 857 for (i=1; i<m; i++) { 858 v = aa + 49*ai[i]; 859 vi = aj + ai[i]; 860 nz = diag[i] - ai[i]; 861 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 865 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 866 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 867 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 868 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 869 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 870 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 871 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 872 v += 49; 873 } 874 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 875 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 876 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 877 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 878 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 879 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 880 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 881 idiag += 49; 882 i2 += 7; 883 } 884 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 885 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 886 } 887 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 888 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 889 i2 = 0; 890 mdiag = a->idiag+49*a->mbs; 891 for (i=0; i<m; i++) { 892 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 893 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 894 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 895 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 896 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 897 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 898 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 899 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 900 mdiag += 36; 901 i2 += 6; 902 } 903 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 904 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 905 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 906 } 907 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 idiag = a->idiag+49*a->mbs - 49; 909 i2 = 7*m - 7; 910 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 911 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 912 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 913 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 914 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 915 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 916 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 917 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 918 idiag -= 49; 919 i2 -= 7; 920 for (i=m-2; i>=0; i--) { 921 v = aa + 49*(diag[i]+1); 922 vi = aj + diag[i] + 1; 923 nz = ai[i+1] - diag[i] - 1; 924 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 925 while (nz--) { 926 idx = 7*(*vi++); 927 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 928 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 929 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 930 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 931 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 932 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 933 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 934 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 935 v += 49; 936 } 937 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 938 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 939 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 940 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 941 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 942 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 943 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 944 idiag -= 49; 945 i2 -= 7; 946 } 947 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 948 } 949 } else { 950 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 951 } 952 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 953 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 /* 958 Special version for direct calls from Fortran (Used in PETSc-fun3d) 959 */ 960 #if defined(PETSC_HAVE_FORTRAN_CAPS) 961 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 962 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 963 #define matsetvaluesblocked4_ matsetvaluesblocked4 964 #endif 965 966 EXTERN_C_BEGIN 967 #undef __FUNCT__ 968 #define __FUNCT__ "matsetvaluesblocked4_" 969 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 970 { 971 Mat A = *AA; 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 974 PetscInt *ai=a->i,*ailen=a->ilen; 975 PetscInt *aj=a->j,stepval,lastcol = -1; 976 const PetscScalar *value = v; 977 MatScalar *ap,*aa = a->a,*bap; 978 979 PetscFunctionBegin; 980 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 981 stepval = (n-1)*4; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; 984 rp = aj + ai[row]; 985 ap = aa + 16*ai[row]; 986 nrow = ailen[row]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; 991 if (col <= lastcol) low = 0; else high = nrow; 992 lastcol = col; 993 value = v + k*(stepval+4 + l)*4; 994 while (high-low > 7) { 995 t = (low+high)/2; 996 if (rp[t] > col) high = t; 997 else low = t; 998 } 999 for (i=low; i<high; i++) { 1000 if (rp[i] > col) break; 1001 if (rp[i] == col) { 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] += *value++; 1006 } 1007 } 1008 goto noinsert2; 1009 } 1010 } 1011 N = nrow++ - 1; 1012 high++; /* added new column index thus must search to one higher than before */ 1013 /* shift up all the later entries in this row */ 1014 for (ii=N; ii>=i; ii--) { 1015 rp[ii+1] = rp[ii]; 1016 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1017 } 1018 if (N >= i) { 1019 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1020 } 1021 rp[i] = col; 1022 bap = ap + 16*i; 1023 for (ii=0; ii<4; ii++,value+=stepval) { 1024 for (jj=ii; jj<16; jj+=4) { 1025 bap[jj] = *value++; 1026 } 1027 } 1028 noinsert2:; 1029 low = i; 1030 } 1031 ailen[row] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 EXTERN_C_END 1036 1037 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1038 #define matsetvalues4_ MATSETVALUES4 1039 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1040 #define matsetvalues4_ matsetvalues4 1041 #endif 1042 1043 EXTERN_C_BEGIN 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatSetValues4_" 1046 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1047 { 1048 Mat A = *AA; 1049 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1050 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1051 PetscInt *ai=a->i,*ailen=a->ilen; 1052 PetscInt *aj=a->j,brow,bcol; 1053 PetscInt ridx,cidx,lastcol = -1; 1054 MatScalar *ap,value,*aa=a->a,*bap; 1055 1056 PetscFunctionBegin; 1057 for (k=0; k<m; k++) { /* loop over added rows */ 1058 row = im[k]; brow = row/4; 1059 rp = aj + ai[brow]; 1060 ap = aa + 16*ai[brow]; 1061 nrow = ailen[brow]; 1062 low = 0; 1063 high = nrow; 1064 for (l=0; l<n; l++) { /* loop over added columns */ 1065 col = in[l]; bcol = col/4; 1066 ridx = row % 4; cidx = col % 4; 1067 value = v[l + k*n]; 1068 if (col <= lastcol) low = 0; else high = nrow; 1069 lastcol = col; 1070 while (high-low > 7) { 1071 t = (low+high)/2; 1072 if (rp[t] > bcol) high = t; 1073 else low = t; 1074 } 1075 for (i=low; i<high; i++) { 1076 if (rp[i] > bcol) break; 1077 if (rp[i] == bcol) { 1078 bap = ap + 16*i + 4*cidx + ridx; 1079 *bap += value; 1080 goto noinsert1; 1081 } 1082 } 1083 N = nrow++ - 1; 1084 high++; /* added new column thus must search to one higher than before */ 1085 /* shift up all the later entries in this row */ 1086 for (ii=N; ii>=i; ii--) { 1087 rp[ii+1] = rp[ii]; 1088 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1089 } 1090 if (N>=i) { 1091 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1092 } 1093 rp[i] = bcol; 1094 ap[16*i + 4*cidx + ridx] = value; 1095 noinsert1:; 1096 low = i; 1097 } 1098 ailen[brow] = nrow; 1099 } 1100 PetscFunctionReturnVoid(); 1101 } 1102 EXTERN_C_END 1103 1104 /* 1105 Checks for missing diagonals 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1109 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt *diag,*jj = a->j,i; 1114 1115 PetscFunctionBegin; 1116 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1117 *missing = PETSC_FALSE; 1118 if (A->rmap->n > 0 && !jj) { 1119 *missing = PETSC_TRUE; 1120 if (d) *d = 0; 1121 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1122 } else { 1123 diag = a->diag; 1124 for (i=0; i<a->mbs; i++) { 1125 if (jj[diag[i]] != i) { 1126 *missing = PETSC_TRUE; 1127 if (d) *d = i; 1128 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1129 } 1130 } 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1137 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1138 { 1139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 PetscInt i,j,m = a->mbs; 1142 1143 PetscFunctionBegin; 1144 if (!a->diag) { 1145 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1146 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1147 a->free_diag = PETSC_TRUE; 1148 } 1149 for (i=0; i<m; i++) { 1150 a->diag[i] = a->i[i+1]; 1151 for (j=a->i[i]; j<a->i[i+1]; j++) { 1152 if (a->j[j] == i) { 1153 a->diag[i] = j; 1154 break; 1155 } 1156 } 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 1162 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1166 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1167 { 1168 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1171 PetscInt *tia, *tja; 1172 1173 PetscFunctionBegin; 1174 *nn = n; 1175 if (!ia) PetscFunctionReturn(0); 1176 if (symmetric) { 1177 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1178 nz = tia[n]; 1179 } else { 1180 tia = a->i; tja = a->j; 1181 } 1182 1183 if (!blockcompressed && bs > 1) { 1184 (*nn) *= bs; 1185 nbs = bs; 1186 /* malloc & create the natural set of indices */ 1187 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1188 if (n) { 1189 (*ia)[0] = 0; 1190 for (j=1; j<bs; j++) { 1191 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1192 } 1193 } 1194 1195 for (i=1; i<n; i++) { 1196 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1197 for (j=1; j<bs; j++) { 1198 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1199 } 1200 } 1201 if (n) { 1202 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1203 } 1204 1205 if (ja) { 1206 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1207 cnt = 0; 1208 for (i=0; i<n; i++) { 1209 for (j=0; j<bs; j++) { 1210 for (k=tia[i]; k<tia[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 (*ja)[cnt++] = bs*tja[k] + l; 1213 } 1214 } 1215 } 1216 } 1217 } 1218 1219 n *= bs; 1220 nz *= bs*bs; 1221 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1222 ierr = PetscFree(tia);CHKERRQ(ierr); 1223 ierr = PetscFree(tja);CHKERRQ(ierr); 1224 } 1225 } else if (oshift == 1) { 1226 if (symmetric) { 1227 PetscInt nz = tia[A->rmap->n/bs]; 1228 /* add 1 to i and j indices */ 1229 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1230 *ia = tia; 1231 if (ja) { 1232 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1233 *ja = tja; 1234 } 1235 } else { 1236 PetscInt nz = a->i[A->rmap->n/bs]; 1237 /* malloc space and add 1 to i and j indices */ 1238 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1239 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1240 if (ja) { 1241 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1242 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1243 } 1244 } 1245 } else { 1246 *ia = tia; 1247 if (ja) *ja = tja; 1248 } 1249 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1255 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1256 { 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 if (!ia) PetscFunctionReturn(0); 1261 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1262 ierr = PetscFree(*ia);CHKERRQ(ierr); 1263 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1270 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1278 #endif 1279 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1280 if (a->row) { 1281 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1282 } 1283 if (a->col) { 1284 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1285 } 1286 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1287 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1288 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1289 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1290 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1291 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1292 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1293 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1294 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1295 1296 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1297 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1298 ierr = PetscFree(a);CHKERRQ(ierr); 1299 1300 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1314 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1315 { 1316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 switch (op) { 1321 case MAT_ROW_ORIENTED: 1322 a->roworiented = flg; 1323 break; 1324 case MAT_KEEP_NONZERO_PATTERN: 1325 a->keepnonzeropattern = flg; 1326 break; 1327 case MAT_NEW_NONZERO_LOCATIONS: 1328 a->nonew = (flg ? 0 : 1); 1329 break; 1330 case MAT_NEW_NONZERO_LOCATION_ERR: 1331 a->nonew = (flg ? -1 : 0); 1332 break; 1333 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1334 a->nonew = (flg ? -2 : 0); 1335 break; 1336 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1337 a->nounused = (flg ? -1 : 0); 1338 break; 1339 case MAT_NEW_DIAGONALS: 1340 case MAT_IGNORE_OFF_PROC_ENTRIES: 1341 case MAT_USE_HASH_TABLE: 1342 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1343 break; 1344 case MAT_SYMMETRIC: 1345 case MAT_STRUCTURALLY_SYMMETRIC: 1346 case MAT_HERMITIAN: 1347 case MAT_SYMMETRY_ETERNAL: 1348 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1349 break; 1350 default: 1351 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1358 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1359 { 1360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1363 MatScalar *aa,*aa_i; 1364 PetscScalar *v_i; 1365 1366 PetscFunctionBegin; 1367 bs = A->rmap->bs; 1368 ai = a->i; 1369 aj = a->j; 1370 aa = a->a; 1371 bs2 = a->bs2; 1372 1373 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1374 1375 bn = row/bs; /* Block number */ 1376 bp = row % bs; /* Block Position */ 1377 M = ai[bn+1] - ai[bn]; 1378 *nz = bs*M; 1379 1380 if (v) { 1381 *v = 0; 1382 if (*nz) { 1383 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1384 for (i=0; i<M; i++) { /* for each block in the block row */ 1385 v_i = *v + i*bs; 1386 aa_i = aa + bs2*(ai[bn] + i); 1387 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1388 } 1389 } 1390 } 1391 1392 if (idx) { 1393 *idx = 0; 1394 if (*nz) { 1395 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1396 for (i=0; i<M; i++) { /* for each block in the block row */ 1397 idx_i = *idx + i*bs; 1398 itmp = bs*aj[ai[bn] + i]; 1399 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1400 } 1401 } 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1408 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1409 { 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1414 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1415 PetscFunctionReturn(0); 1416 } 1417 1418 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1422 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1423 { 1424 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1425 Mat C; 1426 PetscErrorCode ierr; 1427 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1428 PetscInt *rows,*cols,bs2=a->bs2; 1429 MatScalar *array; 1430 1431 PetscFunctionBegin; 1432 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1433 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1434 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1435 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1436 1437 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1438 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1439 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1440 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1441 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1442 ierr = PetscFree(col);CHKERRQ(ierr); 1443 } else { 1444 C = *B; 1445 } 1446 1447 array = a->a; 1448 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1449 for (i=0; i<mbs; i++) { 1450 cols[0] = i*bs; 1451 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1452 len = ai[i+1] - ai[i]; 1453 for (j=0; j<len; j++) { 1454 rows[0] = (*aj++)*bs; 1455 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1456 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1457 array += bs2; 1458 } 1459 } 1460 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1461 1462 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 1465 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1466 *B = C; 1467 } else { 1468 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1475 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1476 { 1477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1480 int fd; 1481 PetscScalar *aa; 1482 FILE *file; 1483 1484 PetscFunctionBegin; 1485 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1486 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1487 col_lens[0] = MAT_FILE_COOKIE; 1488 1489 col_lens[1] = A->rmap->N; 1490 col_lens[2] = A->cmap->n; 1491 col_lens[3] = a->nz*bs2; 1492 1493 /* store lengths of each row and write (including header) to file */ 1494 count = 0; 1495 for (i=0; i<a->mbs; i++) { 1496 for (j=0; j<bs; j++) { 1497 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1498 } 1499 } 1500 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1501 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1502 1503 /* store column indices (zero start index) */ 1504 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1505 count = 0; 1506 for (i=0; i<a->mbs; i++) { 1507 for (j=0; j<bs; j++) { 1508 for (k=a->i[i]; k<a->i[i+1]; k++) { 1509 for (l=0; l<bs; l++) { 1510 jj[count++] = bs*a->j[k] + l; 1511 } 1512 } 1513 } 1514 } 1515 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1516 ierr = PetscFree(jj);CHKERRQ(ierr); 1517 1518 /* store nonzero values */ 1519 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1520 count = 0; 1521 for (i=0; i<a->mbs; i++) { 1522 for (j=0; j<bs; j++) { 1523 for (k=a->i[i]; k<a->i[i+1]; k++) { 1524 for (l=0; l<bs; l++) { 1525 aa[count++] = a->a[bs2*k + l*bs + j]; 1526 } 1527 } 1528 } 1529 } 1530 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1531 ierr = PetscFree(aa);CHKERRQ(ierr); 1532 1533 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1534 if (file) { 1535 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1543 { 1544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1545 PetscErrorCode ierr; 1546 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1547 PetscViewerFormat format; 1548 1549 PetscFunctionBegin; 1550 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1551 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1552 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1553 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1554 Mat aij; 1555 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1556 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1557 ierr = MatDestroy(aij);CHKERRQ(ierr); 1558 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1559 PetscFunctionReturn(0); 1560 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1562 for (i=0; i<a->mbs; i++) { 1563 for (j=0; j<bs; j++) { 1564 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1565 for (k=a->i[i]; k<a->i[i+1]; k++) { 1566 for (l=0; l<bs; l++) { 1567 #if defined(PETSC_USE_COMPLEX) 1568 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1570 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1573 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } 1577 #else 1578 if (a->a[bs2*k + l*bs + j] != 0.0) { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1580 } 1581 #endif 1582 } 1583 } 1584 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1585 } 1586 } 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1588 } else { 1589 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1590 for (i=0; i<a->mbs; i++) { 1591 for (j=0; j<bs; j++) { 1592 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1593 for (k=a->i[i]; k<a->i[i+1]; k++) { 1594 for (l=0; l<bs; l++) { 1595 #if defined(PETSC_USE_COMPLEX) 1596 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1597 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1598 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1599 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1600 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1601 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } else { 1603 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1604 } 1605 #else 1606 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1607 #endif 1608 } 1609 } 1610 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1611 } 1612 } 1613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1614 } 1615 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1621 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1622 { 1623 Mat A = (Mat) Aa; 1624 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1625 PetscErrorCode ierr; 1626 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1627 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1628 MatScalar *aa; 1629 PetscViewer viewer; 1630 1631 PetscFunctionBegin; 1632 1633 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1634 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1635 1636 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1637 1638 /* loop over matrix elements drawing boxes */ 1639 color = PETSC_DRAW_BLUE; 1640 for (i=0,row=0; i<mbs; i++,row+=bs) { 1641 for (j=a->i[i]; j<a->i[i+1]; j++) { 1642 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1644 aa = a->a + j*bs2; 1645 for (k=0; k<bs; k++) { 1646 for (l=0; l<bs; l++) { 1647 if (PetscRealPart(*aa++) >= 0.) continue; 1648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 } 1653 color = PETSC_DRAW_CYAN; 1654 for (i=0,row=0; i<mbs; i++,row+=bs) { 1655 for (j=a->i[i]; j<a->i[i+1]; j++) { 1656 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1657 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1658 aa = a->a + j*bs2; 1659 for (k=0; k<bs; k++) { 1660 for (l=0; l<bs; l++) { 1661 if (PetscRealPart(*aa++) != 0.) continue; 1662 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1663 } 1664 } 1665 } 1666 } 1667 1668 color = PETSC_DRAW_RED; 1669 for (i=0,row=0; i<mbs; i++,row+=bs) { 1670 for (j=a->i[i]; j<a->i[i+1]; j++) { 1671 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1673 aa = a->a + j*bs2; 1674 for (k=0; k<bs; k++) { 1675 for (l=0; l<bs; l++) { 1676 if (PetscRealPart(*aa++) <= 0.) continue; 1677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1678 } 1679 } 1680 } 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1688 { 1689 PetscErrorCode ierr; 1690 PetscReal xl,yl,xr,yr,w,h; 1691 PetscDraw draw; 1692 PetscTruth isnull; 1693 1694 PetscFunctionBegin; 1695 1696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1698 1699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1700 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1701 xr += w; yr += h; xl = -w; yl = -h; 1702 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatView_SeqBAIJ" 1710 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1711 { 1712 PetscErrorCode ierr; 1713 PetscTruth iascii,isbinary,isdraw; 1714 1715 PetscFunctionBegin; 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1718 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1719 if (iascii){ 1720 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1721 } else if (isbinary) { 1722 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1723 } else if (isdraw) { 1724 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1725 } else { 1726 Mat B; 1727 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1728 ierr = MatView(B,viewer);CHKERRQ(ierr); 1729 ierr = MatDestroy(B);CHKERRQ(ierr); 1730 } 1731 PetscFunctionReturn(0); 1732 } 1733 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1737 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1738 { 1739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1740 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1741 PetscInt *ai = a->i,*ailen = a->ilen; 1742 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1743 MatScalar *ap,*aa = a->a; 1744 1745 PetscFunctionBegin; 1746 for (k=0; k<m; k++) { /* loop over rows */ 1747 row = im[k]; brow = row/bs; 1748 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1749 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1750 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1751 nrow = ailen[brow]; 1752 for (l=0; l<n; l++) { /* loop over columns */ 1753 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1754 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1755 col = in[l] ; 1756 bcol = col/bs; 1757 cidx = col%bs; 1758 ridx = row%bs; 1759 high = nrow; 1760 low = 0; /* assume unsorted */ 1761 while (high-low > 5) { 1762 t = (low+high)/2; 1763 if (rp[t] > bcol) high = t; 1764 else low = t; 1765 } 1766 for (i=low; i<high; i++) { 1767 if (rp[i] > bcol) break; 1768 if (rp[i] == bcol) { 1769 *v++ = ap[bs2*i+bs*cidx+ridx]; 1770 goto finished; 1771 } 1772 } 1773 *v++ = 0.0; 1774 finished:; 1775 } 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #define CHUNKSIZE 10 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1783 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1784 { 1785 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1786 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1787 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1788 PetscErrorCode ierr; 1789 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1790 PetscTruth roworiented=a->roworiented; 1791 const PetscScalar *value = v; 1792 MatScalar *ap,*aa = a->a,*bap; 1793 1794 PetscFunctionBegin; 1795 if (roworiented) { 1796 stepval = (n-1)*bs; 1797 } else { 1798 stepval = (m-1)*bs; 1799 } 1800 for (k=0; k<m; k++) { /* loop over added rows */ 1801 row = im[k]; 1802 if (row < 0) continue; 1803 #if defined(PETSC_USE_DEBUG) 1804 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1805 #endif 1806 rp = aj + ai[row]; 1807 ap = aa + bs2*ai[row]; 1808 rmax = imax[row]; 1809 nrow = ailen[row]; 1810 low = 0; 1811 high = nrow; 1812 for (l=0; l<n; l++) { /* loop over added columns */ 1813 if (in[l] < 0) continue; 1814 #if defined(PETSC_USE_DEBUG) 1815 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1816 #endif 1817 col = in[l]; 1818 if (roworiented) { 1819 value = v + k*(stepval+bs)*bs + l*bs; 1820 } else { 1821 value = v + l*(stepval+bs)*bs + k*bs; 1822 } 1823 if (col <= lastcol) low = 0; else high = nrow; 1824 lastcol = col; 1825 while (high-low > 7) { 1826 t = (low+high)/2; 1827 if (rp[t] > col) high = t; 1828 else low = t; 1829 } 1830 for (i=low; i<high; i++) { 1831 if (rp[i] > col) break; 1832 if (rp[i] == col) { 1833 bap = ap + bs2*i; 1834 if (roworiented) { 1835 if (is == ADD_VALUES) { 1836 for (ii=0; ii<bs; ii++,value+=stepval) { 1837 for (jj=ii; jj<bs2; jj+=bs) { 1838 bap[jj] += *value++; 1839 } 1840 } 1841 } else { 1842 for (ii=0; ii<bs; ii++,value+=stepval) { 1843 for (jj=ii; jj<bs2; jj+=bs) { 1844 bap[jj] = *value++; 1845 } 1846 } 1847 } 1848 } else { 1849 if (is == ADD_VALUES) { 1850 for (ii=0; ii<bs; ii++,value+=stepval) { 1851 for (jj=0; jj<bs; jj++) { 1852 *bap++ += *value++; 1853 } 1854 } 1855 } else { 1856 for (ii=0; ii<bs; ii++,value+=stepval) { 1857 for (jj=0; jj<bs; jj++) { 1858 *bap++ = *value++; 1859 } 1860 } 1861 } 1862 } 1863 goto noinsert2; 1864 } 1865 } 1866 if (nonew == 1) goto noinsert2; 1867 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1868 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1869 N = nrow++ - 1; high++; 1870 /* shift up all the later entries in this row */ 1871 for (ii=N; ii>=i; ii--) { 1872 rp[ii+1] = rp[ii]; 1873 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1874 } 1875 if (N >= i) { 1876 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1877 } 1878 rp[i] = col; 1879 bap = ap + bs2*i; 1880 if (roworiented) { 1881 for (ii=0; ii<bs; ii++,value+=stepval) { 1882 for (jj=ii; jj<bs2; jj+=bs) { 1883 bap[jj] = *value++; 1884 } 1885 } 1886 } else { 1887 for (ii=0; ii<bs; ii++,value+=stepval) { 1888 for (jj=0; jj<bs; jj++) { 1889 *bap++ = *value++; 1890 } 1891 } 1892 } 1893 noinsert2:; 1894 low = i; 1895 } 1896 ailen[row] = nrow; 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1903 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1904 { 1905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1906 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1907 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1908 PetscErrorCode ierr; 1909 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1910 MatScalar *aa = a->a,*ap; 1911 PetscReal ratio=0.6; 1912 1913 PetscFunctionBegin; 1914 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1915 1916 if (m) rmax = ailen[0]; 1917 for (i=1; i<mbs; i++) { 1918 /* move each row back by the amount of empty slots (fshift) before it*/ 1919 fshift += imax[i-1] - ailen[i-1]; 1920 rmax = PetscMax(rmax,ailen[i]); 1921 if (fshift) { 1922 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1923 N = ailen[i]; 1924 for (j=0; j<N; j++) { 1925 ip[j-fshift] = ip[j]; 1926 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1927 } 1928 } 1929 ai[i] = ai[i-1] + ailen[i-1]; 1930 } 1931 if (mbs) { 1932 fshift += imax[mbs-1] - ailen[mbs-1]; 1933 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1934 } 1935 /* reset ilen and imax for each row */ 1936 for (i=0; i<mbs; i++) { 1937 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1938 } 1939 a->nz = ai[mbs]; 1940 1941 /* diagonals may have moved, so kill the diagonal pointers */ 1942 a->idiagvalid = PETSC_FALSE; 1943 if (fshift && a->diag) { 1944 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1945 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1946 a->diag = 0; 1947 } 1948 if (fshift && a->nounused == -1) { 1949 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1950 } 1951 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1953 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1954 a->reallocs = 0; 1955 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1956 1957 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1958 if (a->compressedrow.use){ 1959 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1960 } 1961 1962 A->same_nonzero = PETSC_TRUE; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* 1967 This function returns an array of flags which indicate the locations of contiguous 1968 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1969 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1970 Assume: sizes should be long enough to hold all the values. 1971 */ 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1974 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1975 { 1976 PetscInt i,j,k,row; 1977 PetscTruth flg; 1978 1979 PetscFunctionBegin; 1980 for (i=0,j=0; i<n; j++) { 1981 row = idx[i]; 1982 if (row%bs!=0) { /* Not the begining of a block */ 1983 sizes[j] = 1; 1984 i++; 1985 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1986 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1987 i++; 1988 } else { /* Begining of the block, so check if the complete block exists */ 1989 flg = PETSC_TRUE; 1990 for (k=1; k<bs; k++) { 1991 if (row+k != idx[i+k]) { /* break in the block */ 1992 flg = PETSC_FALSE; 1993 break; 1994 } 1995 } 1996 if (flg) { /* No break in the bs */ 1997 sizes[j] = bs; 1998 i+= bs; 1999 } else { 2000 sizes[j] = 1; 2001 i++; 2002 } 2003 } 2004 } 2005 *bs_max = j; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2012 { 2013 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,k,count,*rows; 2016 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2017 PetscScalar zero = 0.0; 2018 MatScalar *aa; 2019 2020 PetscFunctionBegin; 2021 /* Make a copy of the IS and sort it */ 2022 /* allocate memory for rows,sizes */ 2023 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2024 2025 /* copy IS values to rows, and sort them */ 2026 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2027 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2028 if (baij->keepnonzeropattern) { 2029 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2030 bs_max = is_n; 2031 A->same_nonzero = PETSC_TRUE; 2032 } else { 2033 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2034 A->same_nonzero = PETSC_FALSE; 2035 } 2036 2037 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2038 row = rows[j]; 2039 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2040 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2041 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2042 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2043 if (diag != 0.0) { 2044 if (baij->ilen[row/bs] > 0) { 2045 baij->ilen[row/bs] = 1; 2046 baij->j[baij->i[row/bs]] = row/bs; 2047 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2048 } 2049 /* Now insert all the diagonal values for this bs */ 2050 for (k=0; k<bs; k++) { 2051 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2052 } 2053 } else { /* (diag == 0.0) */ 2054 baij->ilen[row/bs] = 0; 2055 } /* end (diag == 0.0) */ 2056 } else { /* (sizes[i] != bs) */ 2057 #if defined (PETSC_USE_DEBUG) 2058 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2059 #endif 2060 for (k=0; k<count; k++) { 2061 aa[0] = zero; 2062 aa += bs; 2063 } 2064 if (diag != 0.0) { 2065 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2066 } 2067 } 2068 } 2069 2070 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2071 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2077 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2078 { 2079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2080 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2081 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2082 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2083 PetscErrorCode ierr; 2084 PetscInt ridx,cidx,bs2=a->bs2; 2085 PetscTruth roworiented=a->roworiented; 2086 MatScalar *ap,value,*aa=a->a,*bap; 2087 2088 PetscFunctionBegin; 2089 if (v) PetscValidScalarPointer(v,6); 2090 for (k=0; k<m; k++) { /* loop over added rows */ 2091 row = im[k]; 2092 brow = row/bs; 2093 if (row < 0) continue; 2094 #if defined(PETSC_USE_DEBUG) 2095 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2096 #endif 2097 rp = aj + ai[brow]; 2098 ap = aa + bs2*ai[brow]; 2099 rmax = imax[brow]; 2100 nrow = ailen[brow]; 2101 low = 0; 2102 high = nrow; 2103 for (l=0; l<n; l++) { /* loop over added columns */ 2104 if (in[l] < 0) continue; 2105 #if defined(PETSC_USE_DEBUG) 2106 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2107 #endif 2108 col = in[l]; bcol = col/bs; 2109 ridx = row % bs; cidx = col % bs; 2110 if (roworiented) { 2111 value = v[l + k*n]; 2112 } else { 2113 value = v[k + l*m]; 2114 } 2115 if (col <= lastcol) low = 0; else high = nrow; 2116 lastcol = col; 2117 while (high-low > 7) { 2118 t = (low+high)/2; 2119 if (rp[t] > bcol) high = t; 2120 else low = t; 2121 } 2122 for (i=low; i<high; i++) { 2123 if (rp[i] > bcol) break; 2124 if (rp[i] == bcol) { 2125 bap = ap + bs2*i + bs*cidx + ridx; 2126 if (is == ADD_VALUES) *bap += value; 2127 else *bap = value; 2128 goto noinsert1; 2129 } 2130 } 2131 if (nonew == 1) goto noinsert1; 2132 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2134 N = nrow++ - 1; high++; 2135 /* shift up all the later entries in this row */ 2136 for (ii=N; ii>=i; ii--) { 2137 rp[ii+1] = rp[ii]; 2138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2139 } 2140 if (N>=i) { 2141 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2142 } 2143 rp[i] = bcol; 2144 ap[bs2*i + bs*cidx + ridx] = value; 2145 a->nz++; 2146 noinsert1:; 2147 low = i; 2148 } 2149 ailen[brow] = nrow; 2150 } 2151 A->same_nonzero = PETSC_FALSE; 2152 PetscFunctionReturn(0); 2153 } 2154 2155 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2156 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2159 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2160 { 2161 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2162 Mat outA; 2163 PetscErrorCode ierr; 2164 PetscTruth row_identity,col_identity; 2165 2166 PetscFunctionBegin; 2167 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2168 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2169 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2170 if (!row_identity || !col_identity) { 2171 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2172 } 2173 2174 outA = inA; 2175 inA->factor = MAT_FACTOR_LU; 2176 2177 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2178 2179 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2180 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2181 a->row = row; 2182 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2183 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2184 a->col = col; 2185 2186 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2187 if (a->icol) { 2188 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2189 } 2190 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2191 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2192 2193 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2194 if (!a->solve_work) { 2195 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2196 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2197 } 2198 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2199 2200 PetscFunctionReturn(0); 2201 } 2202 2203 EXTERN_C_BEGIN 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2206 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2207 { 2208 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2209 PetscInt i,nz,mbs; 2210 2211 PetscFunctionBegin; 2212 nz = baij->maxnz/baij->bs2; 2213 mbs = baij->mbs; 2214 for (i=0; i<nz; i++) { 2215 baij->j[i] = indices[i]; 2216 } 2217 baij->nz = nz; 2218 for (i=0; i<mbs; i++) { 2219 baij->ilen[i] = baij->imax[i]; 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 EXTERN_C_END 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2227 /*@ 2228 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2229 in the matrix. 2230 2231 Input Parameters: 2232 + mat - the SeqBAIJ matrix 2233 - indices - the column indices 2234 2235 Level: advanced 2236 2237 Notes: 2238 This can be called if you have precomputed the nonzero structure of the 2239 matrix and want to provide it to the matrix object to improve the performance 2240 of the MatSetValues() operation. 2241 2242 You MUST have set the correct numbers of nonzeros per row in the call to 2243 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2244 2245 MUST be called before any calls to MatSetValues(); 2246 2247 @*/ 2248 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2249 { 2250 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2251 2252 PetscFunctionBegin; 2253 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2254 PetscValidPointer(indices,2); 2255 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2256 if (f) { 2257 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2258 } else { 2259 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2266 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2267 { 2268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2269 PetscErrorCode ierr; 2270 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2271 PetscReal atmp; 2272 PetscScalar *x,zero = 0.0; 2273 MatScalar *aa; 2274 PetscInt ncols,brow,krow,kcol; 2275 2276 PetscFunctionBegin; 2277 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2278 bs = A->rmap->bs; 2279 aa = a->a; 2280 ai = a->i; 2281 aj = a->j; 2282 mbs = a->mbs; 2283 2284 ierr = VecSet(v,zero);CHKERRQ(ierr); 2285 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2286 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2287 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2288 for (i=0; i<mbs; i++) { 2289 ncols = ai[1] - ai[0]; ai++; 2290 brow = bs*i; 2291 for (j=0; j<ncols; j++){ 2292 for (kcol=0; kcol<bs; kcol++){ 2293 for (krow=0; krow<bs; krow++){ 2294 atmp = PetscAbsScalar(*aa);aa++; 2295 row = brow + krow; /* row index */ 2296 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2297 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2298 } 2299 } 2300 aj++; 2301 } 2302 } 2303 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatCopy_SeqBAIJ" 2309 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2310 { 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 /* If the two matrices have the same copy implementation, use fast copy. */ 2315 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2317 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2318 2319 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2320 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2321 } 2322 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2323 } else { 2324 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2325 } 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2331 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2332 { 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2342 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2343 { 2344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2345 PetscFunctionBegin; 2346 *array = a->a; 2347 PetscFunctionReturn(0); 2348 } 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2352 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2353 { 2354 PetscFunctionBegin; 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #include "petscblaslapack.h" 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2361 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2362 { 2363 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2364 PetscErrorCode ierr; 2365 PetscInt i,bs=Y->rmap->bs,j,bs2; 2366 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2367 2368 PetscFunctionBegin; 2369 if (str == SAME_NONZERO_PATTERN) { 2370 PetscScalar alpha = a; 2371 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2372 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2373 if (y->xtoy && y->XtoY != X) { 2374 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2375 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2376 } 2377 if (!y->xtoy) { /* get xtoy */ 2378 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2379 y->XtoY = X; 2380 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2381 } 2382 bs2 = bs*bs; 2383 for (i=0; i<x->nz; i++) { 2384 j = 0; 2385 while (j < bs2){ 2386 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2387 j++; 2388 } 2389 } 2390 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2391 } else { 2392 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2393 } 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2399 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2400 { 2401 PetscInt rbs,cbs; 2402 PetscErrorCode ierr; 2403 2404 PetscFunctionBegin; 2405 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2406 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2407 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2408 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2414 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2415 { 2416 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2417 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2418 MatScalar *aa = a->a; 2419 2420 PetscFunctionBegin; 2421 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2427 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2428 { 2429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2430 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2431 MatScalar *aa = a->a; 2432 2433 PetscFunctionBegin; 2434 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2442 /* 2443 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2444 */ 2445 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2446 { 2447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2450 PetscInt nz = a->i[m],row,*jj,mr,col; 2451 2452 PetscFunctionBegin; 2453 *nn = n; 2454 if (!ia) PetscFunctionReturn(0); 2455 if (symmetric) { 2456 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2457 } else { 2458 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2459 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2460 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2461 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2462 jj = a->j; 2463 for (i=0; i<nz; i++) { 2464 collengths[jj[i]]++; 2465 } 2466 cia[0] = oshift; 2467 for (i=0; i<n; i++) { 2468 cia[i+1] = cia[i] + collengths[i]; 2469 } 2470 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2471 jj = a->j; 2472 for (row=0; row<m; row++) { 2473 mr = a->i[row+1] - a->i[row]; 2474 for (i=0; i<mr; i++) { 2475 col = *jj++; 2476 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2477 } 2478 } 2479 ierr = PetscFree(collengths);CHKERRQ(ierr); 2480 *ia = cia; *ja = cja; 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2487 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2488 { 2489 PetscErrorCode ierr; 2490 2491 PetscFunctionBegin; 2492 if (!ia) PetscFunctionReturn(0); 2493 ierr = PetscFree(*ia);CHKERRQ(ierr); 2494 ierr = PetscFree(*ja);CHKERRQ(ierr); 2495 PetscFunctionReturn(0); 2496 } 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2500 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2501 { 2502 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2503 PetscErrorCode ierr; 2504 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2505 PetscScalar dx,*y,*xx,*w3_array; 2506 PetscScalar *vscale_array; 2507 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2508 Vec w1=coloring->w1,w2=coloring->w2,w3; 2509 void *fctx = coloring->fctx; 2510 PetscTruth flg = PETSC_FALSE; 2511 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2512 Vec x1_tmp; 2513 2514 PetscFunctionBegin; 2515 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2516 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2517 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2518 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2519 2520 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2521 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2522 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2523 if (flg) { 2524 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2525 } else { 2526 PetscTruth assembled; 2527 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2528 if (assembled) { 2529 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2530 } 2531 } 2532 2533 x1_tmp = x1; 2534 if (!coloring->vscale){ 2535 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2536 } 2537 2538 /* 2539 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2540 coloring->F for the coarser grids from the finest 2541 */ 2542 if (coloring->F) { 2543 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2544 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2545 if (m1 != m2) { 2546 coloring->F = 0; 2547 } 2548 } 2549 2550 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2551 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2552 } 2553 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2554 2555 /* Set w1 = F(x1) */ 2556 if (coloring->F) { 2557 w1 = coloring->F; /* use already computed value of function */ 2558 coloring->F = 0; 2559 } else { 2560 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2561 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2562 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2563 } 2564 2565 if (!coloring->w3) { 2566 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2567 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2568 } 2569 w3 = coloring->w3; 2570 2571 CHKMEMQ; 2572 /* Compute all the local scale factors, including ghost points */ 2573 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2574 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2575 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2576 if (ctype == IS_COLORING_GHOSTED){ 2577 col_start = 0; col_end = N; 2578 } else if (ctype == IS_COLORING_GLOBAL){ 2579 xx = xx - start; 2580 vscale_array = vscale_array - start; 2581 col_start = start; col_end = N + start; 2582 } CHKMEMQ; 2583 for (col=col_start; col<col_end; col++){ 2584 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2585 if (coloring->htype[0] == 'w') { 2586 dx = 1.0 + unorm; 2587 } else { 2588 dx = xx[col]; 2589 } 2590 if (dx == 0.0) dx = 1.0; 2591 #if !defined(PETSC_USE_COMPLEX) 2592 if (dx < umin && dx >= 0.0) dx = umin; 2593 else if (dx < 0.0 && dx > -umin) dx = -umin; 2594 #else 2595 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2596 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2597 #endif 2598 dx *= epsilon; 2599 vscale_array[col] = 1.0/dx; 2600 } CHKMEMQ; 2601 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2602 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2603 if (ctype == IS_COLORING_GLOBAL){ 2604 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2605 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2606 } 2607 CHKMEMQ; 2608 if (coloring->vscaleforrow) { 2609 vscaleforrow = coloring->vscaleforrow; 2610 } else { 2611 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2612 } 2613 2614 2615 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2616 /* 2617 Loop over each color 2618 */ 2619 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2620 for (k=0; k<coloring->ncolors; k++) { 2621 coloring->currentcolor = k; 2622 for (i=0; i<bs; i++) { 2623 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2624 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2625 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2626 /* 2627 Loop over each column associated with color 2628 adding the perturbation to the vector w3. 2629 */ 2630 for (l=0; l<coloring->ncolumns[k]; l++) { 2631 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2632 if (coloring->htype[0] == 'w') { 2633 dx = 1.0 + unorm; 2634 } else { 2635 dx = xx[col]; 2636 } 2637 if (dx == 0.0) dx = 1.0; 2638 #if !defined(PETSC_USE_COMPLEX) 2639 if (dx < umin && dx >= 0.0) dx = umin; 2640 else if (dx < 0.0 && dx > -umin) dx = -umin; 2641 #else 2642 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2643 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2644 #endif 2645 dx *= epsilon; 2646 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2647 w3_array[col] += dx; 2648 } 2649 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2650 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2651 2652 /* 2653 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2654 w2 = F(x1 + dx) - F(x1) 2655 */ 2656 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2657 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2658 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2659 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2660 2661 /* 2662 Loop over rows of vector, putting results into Jacobian matrix 2663 */ 2664 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2665 for (l=0; l<coloring->nrows[k]; l++) { 2666 row = bs*coloring->rows[k][l]; /* local row index */ 2667 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2668 for (j=0; j<bs; j++) { 2669 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2670 srows[j] = row + start + j; 2671 } 2672 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2673 } 2674 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2675 } 2676 } /* endof for each color */ 2677 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2678 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2679 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2680 ierr = PetscFree(srows);CHKERRQ(ierr); 2681 2682 coloring->currentcolor = -1; 2683 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2685 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 /* -------------------------------------------------------------------*/ 2690 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2691 MatGetRow_SeqBAIJ, 2692 MatRestoreRow_SeqBAIJ, 2693 MatMult_SeqBAIJ_N, 2694 /* 4*/ MatMultAdd_SeqBAIJ_N, 2695 MatMultTranspose_SeqBAIJ, 2696 MatMultTransposeAdd_SeqBAIJ, 2697 0, 2698 0, 2699 0, 2700 /*10*/ 0, 2701 MatLUFactor_SeqBAIJ, 2702 0, 2703 0, 2704 MatTranspose_SeqBAIJ, 2705 /*15*/ MatGetInfo_SeqBAIJ, 2706 MatEqual_SeqBAIJ, 2707 MatGetDiagonal_SeqBAIJ, 2708 MatDiagonalScale_SeqBAIJ, 2709 MatNorm_SeqBAIJ, 2710 /*20*/ 0, 2711 MatAssemblyEnd_SeqBAIJ, 2712 MatSetOption_SeqBAIJ, 2713 MatZeroEntries_SeqBAIJ, 2714 /*24*/ MatZeroRows_SeqBAIJ, 2715 0, 2716 0, 2717 0, 2718 0, 2719 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2720 0, 2721 0, 2722 MatGetArray_SeqBAIJ, 2723 MatRestoreArray_SeqBAIJ, 2724 /*34*/ MatDuplicate_SeqBAIJ, 2725 0, 2726 0, 2727 MatILUFactor_SeqBAIJ, 2728 0, 2729 /*39*/ MatAXPY_SeqBAIJ, 2730 MatGetSubMatrices_SeqBAIJ, 2731 MatIncreaseOverlap_SeqBAIJ, 2732 MatGetValues_SeqBAIJ, 2733 MatCopy_SeqBAIJ, 2734 /*44*/ 0, 2735 MatScale_SeqBAIJ, 2736 0, 2737 0, 2738 MatILUDTFactor_SeqBAIJ, 2739 /*49*/ MatSetBlockSize_SeqBAIJ, 2740 MatGetRowIJ_SeqBAIJ, 2741 MatRestoreRowIJ_SeqBAIJ, 2742 MatGetColumnIJ_SeqBAIJ, 2743 MatRestoreColumnIJ_SeqBAIJ, 2744 /*54*/ MatFDColoringCreate_SeqAIJ, 2745 0, 2746 0, 2747 0, 2748 MatSetValuesBlocked_SeqBAIJ, 2749 /*59*/ MatGetSubMatrix_SeqBAIJ, 2750 MatDestroy_SeqBAIJ, 2751 MatView_SeqBAIJ, 2752 0, 2753 0, 2754 /*64*/ 0, 2755 0, 2756 0, 2757 0, 2758 0, 2759 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2760 0, 2761 MatConvert_Basic, 2762 0, 2763 0, 2764 /*74*/ 0, 2765 MatFDColoringApply_BAIJ, 2766 0, 2767 0, 2768 0, 2769 /*79*/ 0, 2770 0, 2771 0, 2772 0, 2773 MatLoad_SeqBAIJ, 2774 /*84*/ 0, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*89*/ 0, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*94*/ 0, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*99*/0, 2790 0, 2791 0, 2792 0, 2793 0, 2794 /*104*/0, 2795 MatRealPart_SeqBAIJ, 2796 MatImaginaryPart_SeqBAIJ, 2797 0, 2798 0, 2799 /*109*/0, 2800 0, 2801 0, 2802 0, 2803 MatMissingDiagonal_SeqBAIJ, 2804 /*114*/0, 2805 0, 2806 0, 2807 0, 2808 0, 2809 /*119*/0, 2810 0, 2811 MatMultHermitianTranspose_SeqBAIJ, 2812 MatMultHermitianTransposeAdd_SeqBAIJ 2813 }; 2814 2815 EXTERN_C_BEGIN 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2818 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2819 { 2820 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2821 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2822 PetscErrorCode ierr; 2823 2824 PetscFunctionBegin; 2825 if (aij->nonew != 1) { 2826 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2827 } 2828 2829 /* allocate space for values if not already there */ 2830 if (!aij->saved_values) { 2831 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2832 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2833 } 2834 2835 /* copy values over */ 2836 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 EXTERN_C_END 2840 2841 EXTERN_C_BEGIN 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2844 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2845 { 2846 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2847 PetscErrorCode ierr; 2848 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2849 2850 PetscFunctionBegin; 2851 if (aij->nonew != 1) { 2852 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2853 } 2854 if (!aij->saved_values) { 2855 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2856 } 2857 2858 /* copy values over */ 2859 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 EXTERN_C_END 2863 2864 EXTERN_C_BEGIN 2865 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2866 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2867 EXTERN_C_END 2868 2869 EXTERN_C_BEGIN 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2872 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2873 { 2874 Mat_SeqBAIJ *b; 2875 PetscErrorCode ierr; 2876 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2877 PetscTruth flg,skipallocation = PETSC_FALSE; 2878 2879 PetscFunctionBegin; 2880 2881 if (nz == MAT_SKIP_ALLOCATION) { 2882 skipallocation = PETSC_TRUE; 2883 nz = 0; 2884 } 2885 2886 if (bs < 0) { 2887 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2888 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2889 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2890 bs = PetscAbs(bs); 2891 } 2892 if (nnz && newbs != bs) { 2893 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2894 } 2895 bs = newbs; 2896 2897 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2898 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2899 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2900 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2901 2902 B->preallocated = PETSC_TRUE; 2903 2904 mbs = B->rmap->n/bs; 2905 nbs = B->cmap->n/bs; 2906 bs2 = bs*bs; 2907 2908 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2909 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2910 } 2911 2912 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2913 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2914 if (nnz) { 2915 for (i=0; i<mbs; i++) { 2916 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2917 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2918 } 2919 } 2920 2921 b = (Mat_SeqBAIJ*)B->data; 2922 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2923 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2924 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2925 2926 if (!flg) { 2927 switch (bs) { 2928 case 1: 2929 B->ops->mult = MatMult_SeqBAIJ_1; 2930 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2931 B->ops->sor = MatSOR_SeqBAIJ_1; 2932 break; 2933 case 2: 2934 B->ops->mult = MatMult_SeqBAIJ_2; 2935 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2936 B->ops->sor = MatSOR_SeqBAIJ_2; 2937 break; 2938 case 3: 2939 B->ops->mult = MatMult_SeqBAIJ_3; 2940 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2941 B->ops->sor = MatSOR_SeqBAIJ_3; 2942 break; 2943 case 4: 2944 B->ops->mult = MatMult_SeqBAIJ_4; 2945 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2946 B->ops->sor = MatSOR_SeqBAIJ_4; 2947 break; 2948 case 5: 2949 B->ops->mult = MatMult_SeqBAIJ_5; 2950 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2951 B->ops->sor = MatSOR_SeqBAIJ_5; 2952 break; 2953 case 6: 2954 B->ops->mult = MatMult_SeqBAIJ_6; 2955 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2956 B->ops->sor = MatSOR_SeqBAIJ_6; 2957 break; 2958 case 7: 2959 B->ops->mult = MatMult_SeqBAIJ_7; 2960 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2961 B->ops->sor = MatSOR_SeqBAIJ_7; 2962 break; 2963 default: 2964 B->ops->mult = MatMult_SeqBAIJ_N; 2965 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2966 break; 2967 } 2968 } 2969 B->rmap->bs = bs; 2970 b->mbs = mbs; 2971 b->nbs = nbs; 2972 if (!skipallocation) { 2973 if (!b->imax) { 2974 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2975 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2976 b->free_imax_ilen = PETSC_TRUE; 2977 } 2978 /* b->ilen will count nonzeros in each block row so far. */ 2979 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2980 if (!nnz) { 2981 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2982 else if (nz <= 0) nz = 1; 2983 for (i=0; i<mbs; i++) b->imax[i] = nz; 2984 nz = nz*mbs; 2985 } else { 2986 nz = 0; 2987 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2988 } 2989 2990 /* allocate the matrix space */ 2991 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2992 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2993 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2994 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2995 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2996 b->singlemalloc = PETSC_TRUE; 2997 b->i[0] = 0; 2998 for (i=1; i<mbs+1; i++) { 2999 b->i[i] = b->i[i-1] + b->imax[i-1]; 3000 } 3001 b->free_a = PETSC_TRUE; 3002 b->free_ij = PETSC_TRUE; 3003 } else { 3004 b->free_a = PETSC_FALSE; 3005 b->free_ij = PETSC_FALSE; 3006 } 3007 3008 B->rmap->bs = bs; 3009 b->bs2 = bs2; 3010 b->mbs = mbs; 3011 b->nz = 0; 3012 b->maxnz = nz*bs2; 3013 B->info.nz_unneeded = (PetscReal)b->maxnz; 3014 PetscFunctionReturn(0); 3015 } 3016 EXTERN_C_END 3017 3018 EXTERN_C_BEGIN 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3021 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3022 { 3023 PetscInt i,m,nz,nz_max=0,*nnz; 3024 PetscScalar *values=0; 3025 PetscErrorCode ierr; 3026 3027 PetscFunctionBegin; 3028 3029 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3030 3031 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3032 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3033 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3034 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3035 m = B->rmap->n/bs; 3036 3037 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3038 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3039 for(i=0; i<m; i++) { 3040 nz = ii[i+1]- ii[i]; 3041 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3042 nz_max = PetscMax(nz_max, nz); 3043 nnz[i] = nz; 3044 } 3045 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3046 ierr = PetscFree(nnz);CHKERRQ(ierr); 3047 3048 values = (PetscScalar*)V; 3049 if (!values) { 3050 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3051 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3052 } 3053 for (i=0; i<m; i++) { 3054 PetscInt ncols = ii[i+1] - ii[i]; 3055 const PetscInt *icols = jj + ii[i]; 3056 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3057 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3058 } 3059 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3060 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 3068 EXTERN_C_BEGIN 3069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3070 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3071 EXTERN_C_END 3072 3073 /*MC 3074 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3075 block sparse compressed row format. 3076 3077 Options Database Keys: 3078 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3079 3080 Level: beginner 3081 3082 .seealso: MatCreateSeqBAIJ() 3083 M*/ 3084 3085 3086 EXTERN_C_BEGIN 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "MatCreate_SeqBAIJ" 3089 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3090 { 3091 PetscErrorCode ierr; 3092 PetscMPIInt size; 3093 Mat_SeqBAIJ *b; 3094 3095 PetscFunctionBegin; 3096 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3097 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3098 3099 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3100 B->data = (void*)b; 3101 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3102 B->mapping = 0; 3103 b->row = 0; 3104 b->col = 0; 3105 b->icol = 0; 3106 b->reallocs = 0; 3107 b->saved_values = 0; 3108 3109 b->roworiented = PETSC_TRUE; 3110 b->nonew = 0; 3111 b->diag = 0; 3112 b->solve_work = 0; 3113 b->mult_work = 0; 3114 B->spptr = 0; 3115 B->info.nz_unneeded = (PetscReal)b->maxnz; 3116 b->keepnonzeropattern = PETSC_FALSE; 3117 b->xtoy = 0; 3118 b->XtoY = 0; 3119 b->compressedrow.use = PETSC_FALSE; 3120 b->compressedrow.nrows = 0; 3121 b->compressedrow.i = PETSC_NULL; 3122 b->compressedrow.rindex = PETSC_NULL; 3123 b->compressedrow.checked = PETSC_FALSE; 3124 B->same_nonzero = PETSC_FALSE; 3125 3126 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3127 "MatGetFactorAvailable_seqbaij_petsc", 3128 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3129 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3130 "MatGetFactor_seqbaij_petsc", 3131 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3132 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3133 "MatInvertBlockDiagonal_SeqBAIJ", 3134 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3135 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3136 "MatStoreValues_SeqBAIJ", 3137 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3139 "MatRetrieveValues_SeqBAIJ", 3140 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3141 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3142 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3143 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3144 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3145 "MatConvert_SeqBAIJ_SeqAIJ", 3146 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3147 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3148 "MatConvert_SeqBAIJ_SeqSBAIJ", 3149 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3151 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3152 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3153 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3154 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3155 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3156 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 EXTERN_C_END 3160 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3163 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3164 { 3165 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3166 PetscErrorCode ierr; 3167 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3168 3169 PetscFunctionBegin; 3170 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3171 3172 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3173 c->imax = a->imax; 3174 c->ilen = a->ilen; 3175 c->free_imax_ilen = PETSC_FALSE; 3176 } else { 3177 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3178 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3179 for (i=0; i<mbs; i++) { 3180 c->imax[i] = a->imax[i]; 3181 c->ilen[i] = a->ilen[i]; 3182 } 3183 c->free_imax_ilen = PETSC_TRUE; 3184 } 3185 3186 /* allocate the matrix space */ 3187 if (mallocmatspace){ 3188 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3189 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3190 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3191 c->singlemalloc = PETSC_FALSE; 3192 c->free_ij = PETSC_FALSE; 3193 c->i = a->i; 3194 c->j = a->j; 3195 c->parent = A; 3196 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3197 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3198 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3199 } else { 3200 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3201 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3202 c->singlemalloc = PETSC_TRUE; 3203 c->free_ij = PETSC_TRUE; 3204 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3205 if (mbs > 0) { 3206 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3207 if (cpvalues == MAT_COPY_VALUES) { 3208 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3209 } else { 3210 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3211 } 3212 } 3213 } 3214 } 3215 3216 c->roworiented = a->roworiented; 3217 c->nonew = a->nonew; 3218 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3219 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3220 c->bs2 = a->bs2; 3221 c->mbs = a->mbs; 3222 c->nbs = a->nbs; 3223 3224 if (a->diag) { 3225 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3226 c->diag = a->diag; 3227 c->free_diag = PETSC_FALSE; 3228 } else { 3229 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3230 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3231 for (i=0; i<mbs; i++) { 3232 c->diag[i] = a->diag[i]; 3233 } 3234 c->free_diag = PETSC_TRUE; 3235 } 3236 } else c->diag = 0; 3237 c->nz = a->nz; 3238 c->maxnz = a->maxnz; 3239 c->solve_work = 0; 3240 c->mult_work = 0; 3241 c->free_a = PETSC_TRUE; 3242 c->free_ij = PETSC_TRUE; 3243 C->preallocated = PETSC_TRUE; 3244 C->assembled = PETSC_TRUE; 3245 3246 c->compressedrow.use = a->compressedrow.use; 3247 c->compressedrow.nrows = a->compressedrow.nrows; 3248 c->compressedrow.checked = a->compressedrow.checked; 3249 if (a->compressedrow.checked && a->compressedrow.use){ 3250 i = a->compressedrow.nrows; 3251 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3252 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3253 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3254 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3255 } else { 3256 c->compressedrow.use = PETSC_FALSE; 3257 c->compressedrow.i = PETSC_NULL; 3258 c->compressedrow.rindex = PETSC_NULL; 3259 } 3260 C->same_nonzero = A->same_nonzero; 3261 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3267 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3268 { 3269 PetscErrorCode ierr; 3270 3271 PetscFunctionBegin; 3272 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3273 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3274 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3275 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3276 PetscFunctionReturn(0); 3277 } 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatLoad_SeqBAIJ" 3281 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3282 { 3283 Mat_SeqBAIJ *a; 3284 Mat B; 3285 PetscErrorCode ierr; 3286 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3287 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3288 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3289 PetscInt *masked,nmask,tmp,bs2,ishift; 3290 PetscMPIInt size; 3291 int fd; 3292 PetscScalar *aa; 3293 MPI_Comm comm = ((PetscObject)viewer)->comm; 3294 3295 PetscFunctionBegin; 3296 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3297 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3298 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3299 bs2 = bs*bs; 3300 3301 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3302 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3304 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3305 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3306 M = header[1]; N = header[2]; nz = header[3]; 3307 3308 if (header[3] < 0) { 3309 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3310 } 3311 3312 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3313 3314 /* 3315 This code adds extra rows to make sure the number of rows is 3316 divisible by the blocksize 3317 */ 3318 mbs = M/bs; 3319 extra_rows = bs - M + bs*(mbs); 3320 if (extra_rows == bs) extra_rows = 0; 3321 else mbs++; 3322 if (extra_rows) { 3323 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3324 } 3325 3326 /* read in row lengths */ 3327 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3328 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3329 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3330 3331 /* read in column indices */ 3332 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3333 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3334 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3335 3336 /* loop over row lengths determining block row lengths */ 3337 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3338 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3339 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3340 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3341 rowcount = 0; 3342 nzcount = 0; 3343 for (i=0; i<mbs; i++) { 3344 nmask = 0; 3345 for (j=0; j<bs; j++) { 3346 kmax = rowlengths[rowcount]; 3347 for (k=0; k<kmax; k++) { 3348 tmp = jj[nzcount++]/bs; 3349 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3350 } 3351 rowcount++; 3352 } 3353 browlengths[i] += nmask; 3354 /* zero out the mask elements we set */ 3355 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3356 } 3357 3358 /* create our matrix */ 3359 ierr = MatCreate(comm,&B); 3360 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3361 ierr = MatSetType(B,type);CHKERRQ(ierr); 3362 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3363 a = (Mat_SeqBAIJ*)B->data; 3364 3365 /* set matrix "i" values */ 3366 a->i[0] = 0; 3367 for (i=1; i<= mbs; i++) { 3368 a->i[i] = a->i[i-1] + browlengths[i-1]; 3369 a->ilen[i-1] = browlengths[i-1]; 3370 } 3371 a->nz = 0; 3372 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3373 3374 /* read in nonzero values */ 3375 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3376 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3377 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3378 3379 /* set "a" and "j" values into matrix */ 3380 nzcount = 0; jcount = 0; 3381 for (i=0; i<mbs; i++) { 3382 nzcountb = nzcount; 3383 nmask = 0; 3384 for (j=0; j<bs; j++) { 3385 kmax = rowlengths[i*bs+j]; 3386 for (k=0; k<kmax; k++) { 3387 tmp = jj[nzcount++]/bs; 3388 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3389 } 3390 } 3391 /* sort the masked values */ 3392 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3393 3394 /* set "j" values into matrix */ 3395 maskcount = 1; 3396 for (j=0; j<nmask; j++) { 3397 a->j[jcount++] = masked[j]; 3398 mask[masked[j]] = maskcount++; 3399 } 3400 /* set "a" values into matrix */ 3401 ishift = bs2*a->i[i]; 3402 for (j=0; j<bs; j++) { 3403 kmax = rowlengths[i*bs+j]; 3404 for (k=0; k<kmax; k++) { 3405 tmp = jj[nzcountb]/bs ; 3406 block = mask[tmp] - 1; 3407 point = jj[nzcountb] - bs*tmp; 3408 idx = ishift + bs2*block + j + bs*point; 3409 a->a[idx] = (MatScalar)aa[nzcountb++]; 3410 } 3411 } 3412 /* zero out the mask elements we set */ 3413 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3414 } 3415 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3416 3417 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3418 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3419 ierr = PetscFree(aa);CHKERRQ(ierr); 3420 ierr = PetscFree(jj);CHKERRQ(ierr); 3421 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3422 3423 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatView_Private(B);CHKERRQ(ierr); 3426 3427 *A = B; 3428 PetscFunctionReturn(0); 3429 } 3430 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatCreateSeqBAIJ" 3433 /*@C 3434 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3435 compressed row) format. For good matrix assembly performance the 3436 user should preallocate the matrix storage by setting the parameter nz 3437 (or the array nnz). By setting these parameters accurately, performance 3438 during matrix assembly can be increased by more than a factor of 50. 3439 3440 Collective on MPI_Comm 3441 3442 Input Parameters: 3443 + comm - MPI communicator, set to PETSC_COMM_SELF 3444 . bs - size of block 3445 . m - number of rows 3446 . n - number of columns 3447 . nz - number of nonzero blocks per block row (same for all rows) 3448 - nnz - array containing the number of nonzero blocks in the various block rows 3449 (possibly different for each block row) or PETSC_NULL 3450 3451 Output Parameter: 3452 . A - the matrix 3453 3454 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3455 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3456 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3457 3458 Options Database Keys: 3459 . -mat_no_unroll - uses code that does not unroll the loops in the 3460 block calculations (much slower) 3461 . -mat_block_size - size of the blocks to use 3462 3463 Level: intermediate 3464 3465 Notes: 3466 The number of rows and columns must be divisible by blocksize. 3467 3468 If the nnz parameter is given then the nz parameter is ignored 3469 3470 A nonzero block is any block that as 1 or more nonzeros in it 3471 3472 The block AIJ format is fully compatible with standard Fortran 77 3473 storage. That is, the stored row and column indices can begin at 3474 either one (as in Fortran) or zero. See the users' manual for details. 3475 3476 Specify the preallocated storage with either nz or nnz (not both). 3477 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3478 allocation. For additional details, see the users manual chapter on 3479 matrices. 3480 3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3482 @*/ 3483 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3484 { 3485 PetscErrorCode ierr; 3486 3487 PetscFunctionBegin; 3488 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3489 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3490 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3491 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3497 /*@C 3498 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3499 per row in the matrix. For good matrix assembly performance the 3500 user should preallocate the matrix storage by setting the parameter nz 3501 (or the array nnz). By setting these parameters accurately, performance 3502 during matrix assembly can be increased by more than a factor of 50. 3503 3504 Collective on MPI_Comm 3505 3506 Input Parameters: 3507 + A - the matrix 3508 . bs - size of block 3509 . nz - number of block nonzeros per block row (same for all rows) 3510 - nnz - array containing the number of block nonzeros in the various block rows 3511 (possibly different for each block row) or PETSC_NULL 3512 3513 Options Database Keys: 3514 . -mat_no_unroll - uses code that does not unroll the loops in the 3515 block calculations (much slower) 3516 . -mat_block_size - size of the blocks to use 3517 3518 Level: intermediate 3519 3520 Notes: 3521 If the nnz parameter is given then the nz parameter is ignored 3522 3523 You can call MatGetInfo() to get information on how effective the preallocation was; 3524 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3525 You can also run with the option -info and look for messages with the string 3526 malloc in them to see if additional memory allocation was needed. 3527 3528 The block AIJ format is fully compatible with standard Fortran 77 3529 storage. That is, the stored row and column indices can begin at 3530 either one (as in Fortran) or zero. See the users' manual for details. 3531 3532 Specify the preallocated storage with either nz or nnz (not both). 3533 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3534 allocation. For additional details, see the users manual chapter on 3535 matrices. 3536 3537 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3538 @*/ 3539 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3540 { 3541 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3542 3543 PetscFunctionBegin; 3544 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3545 if (f) { 3546 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3547 } 3548 PetscFunctionReturn(0); 3549 } 3550 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3553 /*@C 3554 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3555 (the default sequential PETSc format). 3556 3557 Collective on MPI_Comm 3558 3559 Input Parameters: 3560 + A - the matrix 3561 . i - the indices into j for the start of each local row (starts with zero) 3562 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3563 - v - optional values in the matrix 3564 3565 Level: developer 3566 3567 .keywords: matrix, aij, compressed row, sparse 3568 3569 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3570 @*/ 3571 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3572 { 3573 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3574 3575 PetscFunctionBegin; 3576 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3577 if (f) { 3578 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3579 } 3580 PetscFunctionReturn(0); 3581 } 3582 3583 3584 #undef __FUNCT__ 3585 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3586 /*@ 3587 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3588 (upper triangular entries in CSR format) provided by the user. 3589 3590 Collective on MPI_Comm 3591 3592 Input Parameters: 3593 + comm - must be an MPI communicator of size 1 3594 . bs - size of block 3595 . m - number of rows 3596 . n - number of columns 3597 . i - row indices 3598 . j - column indices 3599 - a - matrix values 3600 3601 Output Parameter: 3602 . mat - the matrix 3603 3604 Level: intermediate 3605 3606 Notes: 3607 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3608 once the matrix is destroyed 3609 3610 You cannot set new nonzero locations into this matrix, that will generate an error. 3611 3612 The i and j indices are 0 based 3613 3614 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3615 3616 @*/ 3617 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3618 { 3619 PetscErrorCode ierr; 3620 PetscInt ii; 3621 Mat_SeqBAIJ *baij; 3622 3623 PetscFunctionBegin; 3624 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3625 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3626 3627 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3628 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3629 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3630 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3631 baij = (Mat_SeqBAIJ*)(*mat)->data; 3632 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3633 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3634 3635 baij->i = i; 3636 baij->j = j; 3637 baij->a = a; 3638 baij->singlemalloc = PETSC_FALSE; 3639 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3640 baij->free_a = PETSC_FALSE; 3641 baij->free_ij = PETSC_FALSE; 3642 3643 for (ii=0; ii<m; ii++) { 3644 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3645 #if defined(PETSC_USE_DEBUG) 3646 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3647 #endif 3648 } 3649 #if defined(PETSC_USE_DEBUG) 3650 for (ii=0; ii<baij->i[m]; ii++) { 3651 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3652 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3653 } 3654 #endif 3655 3656 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 PetscFunctionReturn(0); 3659 } 3660