1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 1: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 1*diag_offset[i]; 68 diag[0] = odiag[0]; 69 mdiag[0] = odiag[0]; 70 diag[0] = 1.0 / (diag[0] + shift); 71 diag += 1; 72 mdiag += 1; 73 } 74 break; 75 case 2: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 4*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 81 diag += 4; 82 mdiag += 4; 83 } 84 break; 85 case 3: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 9*diag_offset[i]; 88 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 89 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 90 diag[8] = odiag[8]; 91 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 92 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 93 mdiag[8] = odiag[8]; 94 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 95 diag += 9; 96 mdiag += 9; 97 } 98 break; 99 case 4: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 16*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 105 diag += 16; 106 mdiag += 16; 107 } 108 break; 109 case 5: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 25*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 115 diag += 25; 116 mdiag += 25; 117 } 118 break; 119 case 6: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 36*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 125 diag += 36; 126 mdiag += 36; 127 } 128 break; 129 default: 130 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 139 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 PetscScalar *x,x1,s1; 143 const PetscScalar *b; 144 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 145 PetscErrorCode ierr; 146 PetscInt m = a->mbs,i,i2,nz,j; 147 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 148 149 PetscFunctionBegin; 150 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 151 its = its*lits; 152 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 153 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 154 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 155 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 156 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 157 158 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 159 160 diag = a->diag; 161 idiag = a->idiag; 162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 163 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 164 165 if (flag & SOR_ZERO_INITIAL_GUESS) { 166 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 167 x[0] = b[0]*idiag[0]; 168 i2 = 1; 169 idiag += 1; 170 for (i=1; i<m; i++) { 171 v = aa + ai[i]; 172 vi = aj + ai[i]; 173 nz = diag[i] - ai[i]; 174 s1 = b[i2]; 175 for (j=0; j<nz; j++) { 176 s1 -= v[j]*x[vi[j]]; 177 } 178 x[i2] = idiag[0]*s1; 179 idiag += 1; 180 i2 += 1; 181 } 182 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 183 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 184 } 185 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 186 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 187 i2 = 0; 188 mdiag = a->idiag+a->mbs; 189 for (i=0; i<m; i++) { 190 x1 = x[i2]; 191 x[i2] = mdiag[0]*x1; 192 mdiag += 1; 193 i2 += 1; 194 } 195 ierr = PetscLogFlops(m);CHKERRQ(ierr); 196 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 197 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 198 } 199 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 200 idiag = a->idiag+a->mbs - 1; 201 i2 = m - 1; 202 x1 = x[i2]; 203 x[i2] = idiag[0]*x1; 204 idiag -= 1; 205 i2 -= 1; 206 for (i=m-2; i>=0; i--) { 207 v = aa + (diag[i]+1); 208 vi = aj + diag[i] + 1; 209 nz = ai[i+1] - diag[i] - 1; 210 s1 = x[i2]; 211 for (j=0; j<nz; j++) { 212 s1 -= v[j]*x[vi[j]]; 213 } 214 x[i2] = idiag[0]*s1; 215 idiag -= 1; 216 i2 -= 1; 217 } 218 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 219 } 220 } else { 221 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 222 } 223 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 #undef __FUNCT__ 229 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 230 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 231 { 232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 233 PetscScalar *x,x1,x2,s1,s2; 234 const PetscScalar *b; 235 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 236 PetscErrorCode ierr; 237 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 238 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 239 240 PetscFunctionBegin; 241 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 242 its = its*lits; 243 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 244 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 245 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 246 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 247 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 248 249 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 250 251 diag = a->diag; 252 idiag = a->idiag; 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 255 256 if (flag & SOR_ZERO_INITIAL_GUESS) { 257 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 258 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 259 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 260 i2 = 2; 261 idiag += 4; 262 for (i=1; i<m; i++) { 263 v = aa + 4*ai[i]; 264 vi = aj + ai[i]; 265 nz = diag[i] - ai[i]; 266 s1 = b[i2]; s2 = b[i2+1]; 267 for (j=0; j<nz; j++) { 268 idx = 2*vi[j]; 269 it = 4*j; 270 x1 = x[idx]; x2 = x[1+idx]; 271 s1 -= v[it]*x1 + v[it+2]*x2; 272 s2 -= v[it+1]*x1 + v[it+3]*x2; 273 } 274 x[i2] = idiag[0]*s1 + idiag[2]*s2; 275 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 276 idiag += 4; 277 i2 += 2; 278 } 279 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 280 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 281 } 282 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 283 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 284 i2 = 0; 285 mdiag = a->idiag+4*a->mbs; 286 for (i=0; i<m; i++) { 287 x1 = x[i2]; x2 = x[i2+1]; 288 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 289 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 290 mdiag += 4; 291 i2 += 2; 292 } 293 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 294 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 295 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 296 } 297 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 298 idiag = a->idiag+4*a->mbs - 4; 299 i2 = 2*m - 2; 300 x1 = x[i2]; x2 = x[i2+1]; 301 x[i2] = idiag[0]*x1 + idiag[2]*x2; 302 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 303 idiag -= 4; 304 i2 -= 2; 305 for (i=m-2; i>=0; i--) { 306 v = aa + 4*(diag[i]+1); 307 vi = aj + diag[i] + 1; 308 nz = ai[i+1] - diag[i] - 1; 309 s1 = x[i2]; s2 = x[i2+1]; 310 for (j=0; j<nz; j++) { 311 idx = 2*vi[j]; 312 it = 4*j; 313 x1 = x[idx]; x2 = x[1+idx]; 314 s1 -= v[it]*x1 + v[it+2]*x2; 315 s2 -= v[it+1]*x1 + v[it+3]*x2; 316 } 317 x[i2] = idiag[0]*s1 + idiag[2]*s2; 318 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 319 idiag -= 4; 320 i2 -= 2; 321 } 322 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 323 } 324 } else { 325 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 326 } 327 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 328 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 329 PetscFunctionReturn(0); 330 } 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 334 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 335 { 336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 337 PetscScalar *x,x1,x2,x3,s1,s2,s3; 338 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 339 const PetscScalar *b; 340 PetscErrorCode ierr; 341 PetscInt m = a->mbs,i,i2,nz,idx; 342 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 343 344 PetscFunctionBegin; 345 its = its*lits; 346 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 347 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 348 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 349 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 350 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 351 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 352 353 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 354 355 diag = a->diag; 356 idiag = a->idiag; 357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 359 360 if (flag & SOR_ZERO_INITIAL_GUESS) { 361 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 362 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 363 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 364 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 365 i2 = 3; 366 idiag += 9; 367 for (i=1; i<m; i++) { 368 v = aa + 9*ai[i]; 369 vi = aj + ai[i]; 370 nz = diag[i] - ai[i]; 371 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 372 while (nz--) { 373 idx = 3*(*vi++); 374 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 375 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 376 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 377 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 378 v += 9; 379 } 380 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 381 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 382 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 383 idiag += 9; 384 i2 += 3; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 388 } 389 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 390 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 391 i2 = 0; 392 mdiag = a->idiag+9*a->mbs; 393 for (i=0; i<m; i++) { 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 396 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 397 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 398 mdiag += 9; 399 i2 += 3; 400 } 401 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+9*a->mbs - 9; 407 i2 = 3*m - 3; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 409 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 410 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 411 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 412 idiag -= 9; 413 i2 -= 3; 414 for (i=m-2; i>=0; i--) { 415 v = aa + 9*(diag[i]+1); 416 vi = aj + diag[i] + 1; 417 nz = ai[i+1] - diag[i] - 1; 418 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 419 while (nz--) { 420 idx = 3*(*vi++); 421 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 422 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 423 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 424 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 425 v += 9; 426 } 427 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 428 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 429 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 430 idiag -= 9; 431 i2 -= 3; 432 } 433 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 445 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 449 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 450 const PetscScalar *b; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 457 its = its*lits; 458 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 459 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 460 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 461 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 462 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 463 464 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 465 466 diag = a->diag; 467 idiag = a->idiag; 468 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 469 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 470 471 if (flag & SOR_ZERO_INITIAL_GUESS) { 472 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 473 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 474 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 475 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 476 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 477 i2 = 4; 478 idiag += 16; 479 for (i=1; i<m; i++) { 480 v = aa + 16*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 484 while (nz--) { 485 idx = 4*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 487 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 488 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 489 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 490 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 491 v += 16; 492 } 493 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 494 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 495 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 496 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 497 idiag += 16; 498 i2 += 4; 499 } 500 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 501 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 502 } 503 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 504 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 505 i2 = 0; 506 mdiag = a->idiag+16*a->mbs; 507 for (i=0; i<m; i++) { 508 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 509 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 510 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 511 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 512 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 513 mdiag += 16; 514 i2 += 4; 515 } 516 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 517 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 518 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 519 } 520 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 521 idiag = a->idiag+16*a->mbs - 16; 522 i2 = 4*m - 4; 523 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 524 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 525 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 526 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 527 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 528 idiag -= 16; 529 i2 -= 4; 530 for (i=m-2; i>=0; i--) { 531 v = aa + 16*(diag[i]+1); 532 vi = aj + diag[i] + 1; 533 nz = ai[i+1] - diag[i] - 1; 534 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 535 while (nz--) { 536 idx = 4*(*vi++); 537 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 538 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 539 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 540 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 541 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 542 v += 16; 543 } 544 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 545 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 546 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 547 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 548 idiag -= 16; 549 i2 -= 4; 550 } 551 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 552 } 553 } else { 554 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 555 } 556 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 557 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 563 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 564 { 565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 566 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 567 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 568 const PetscScalar *b; 569 PetscErrorCode ierr; 570 PetscInt m = a->mbs,i,i2,nz,idx; 571 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 572 573 PetscFunctionBegin; 574 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 575 its = its*lits; 576 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 577 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 578 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 579 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 580 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 581 582 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 583 584 diag = a->diag; 585 idiag = a->idiag; 586 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 587 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 588 589 if (flag & SOR_ZERO_INITIAL_GUESS) { 590 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 591 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 592 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 593 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 594 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 595 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 596 i2 = 5; 597 idiag += 25; 598 for (i=1; i<m; i++) { 599 v = aa + 25*ai[i]; 600 vi = aj + ai[i]; 601 nz = diag[i] - ai[i]; 602 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 603 while (nz--) { 604 idx = 5*(*vi++); 605 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 606 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 607 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 608 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 609 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 610 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 611 v += 25; 612 } 613 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 614 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 615 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 616 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 617 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 618 idiag += 25; 619 i2 += 5; 620 } 621 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 622 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 623 } 624 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 625 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 626 i2 = 0; 627 mdiag = a->idiag+25*a->mbs; 628 for (i=0; i<m; i++) { 629 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 630 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 631 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 632 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 633 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 634 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 635 mdiag += 25; 636 i2 += 5; 637 } 638 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 639 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 640 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 641 } 642 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 643 idiag = a->idiag+25*a->mbs - 25; 644 i2 = 5*m - 5; 645 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 646 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 647 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 648 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 649 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 650 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 651 idiag -= 25; 652 i2 -= 5; 653 for (i=m-2; i>=0; i--) { 654 v = aa + 25*(diag[i]+1); 655 vi = aj + diag[i] + 1; 656 nz = ai[i+1] - diag[i] - 1; 657 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 658 while (nz--) { 659 idx = 5*(*vi++); 660 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 661 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 662 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 663 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 664 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 665 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 666 v += 25; 667 } 668 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 669 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 670 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 671 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 672 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 673 idiag -= 25; 674 i2 -= 5; 675 } 676 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 677 } 678 } else { 679 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 680 } 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 688 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 692 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 693 const PetscScalar *b; 694 PetscErrorCode ierr; 695 PetscInt m = a->mbs,i,i2,nz,idx; 696 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 697 698 PetscFunctionBegin; 699 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 700 its = its*lits; 701 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 702 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 703 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 704 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 705 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 706 707 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 708 709 diag = a->diag; 710 idiag = a->idiag; 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 713 714 if (flag & SOR_ZERO_INITIAL_GUESS) { 715 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 716 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 717 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 718 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 719 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 720 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 721 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 722 i2 = 6; 723 idiag += 36; 724 for (i=1; i<m; i++) { 725 v = aa + 36*ai[i]; 726 vi = aj + ai[i]; 727 nz = diag[i] - ai[i]; 728 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 729 while (nz--) { 730 idx = 6*(*vi++); 731 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 732 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 733 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 734 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 735 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 736 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 737 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 738 v += 36; 739 } 740 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 741 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 742 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 743 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 744 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 745 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 746 idiag += 36; 747 i2 += 6; 748 } 749 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 750 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 751 } 752 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 753 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 754 i2 = 0; 755 mdiag = a->idiag+36*a->mbs; 756 for (i=0; i<m; i++) { 757 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 758 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 759 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 760 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 761 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 762 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 763 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 764 mdiag += 36; 765 i2 += 6; 766 } 767 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 768 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 769 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 770 } 771 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 772 idiag = a->idiag+36*a->mbs - 36; 773 i2 = 6*m - 6; 774 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 775 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 776 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 777 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 778 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 779 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 780 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 781 idiag -= 36; 782 i2 -= 6; 783 for (i=m-2; i>=0; i--) { 784 v = aa + 36*(diag[i]+1); 785 vi = aj + diag[i] + 1; 786 nz = ai[i+1] - diag[i] - 1; 787 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 788 while (nz--) { 789 idx = 6*(*vi++); 790 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 791 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 792 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 793 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 794 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 795 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 796 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 797 v += 36; 798 } 799 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 800 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 801 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 802 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 803 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 804 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 805 idiag -= 36; 806 i2 -= 6; 807 } 808 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 809 } 810 } else { 811 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 812 } 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNCT__ 819 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 820 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 821 { 822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 823 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 824 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 825 const PetscScalar *b; 826 PetscErrorCode ierr; 827 PetscInt m = a->mbs,i,i2,nz,idx; 828 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 829 830 PetscFunctionBegin; 831 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 832 its = its*lits; 833 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 834 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 835 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 836 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 837 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 838 839 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 840 841 diag = a->diag; 842 idiag = a->idiag; 843 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 844 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 845 846 if (flag & SOR_ZERO_INITIAL_GUESS) { 847 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 848 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 849 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 850 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 851 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 852 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 853 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 854 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 855 i2 = 7; 856 idiag += 49; 857 for (i=1; i<m; i++) { 858 v = aa + 49*ai[i]; 859 vi = aj + ai[i]; 860 nz = diag[i] - ai[i]; 861 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 865 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 866 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 867 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 868 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 869 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 870 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 871 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 872 v += 49; 873 } 874 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 875 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 876 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 877 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 878 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 879 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 880 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 881 idiag += 49; 882 i2 += 7; 883 } 884 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 885 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 886 } 887 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 888 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 889 i2 = 0; 890 mdiag = a->idiag+49*a->mbs; 891 for (i=0; i<m; i++) { 892 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 893 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 894 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 895 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 896 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 897 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 898 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 899 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 900 mdiag += 36; 901 i2 += 6; 902 } 903 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 904 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 905 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 906 } 907 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 idiag = a->idiag+49*a->mbs - 49; 909 i2 = 7*m - 7; 910 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 911 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 912 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 913 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 914 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 915 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 916 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 917 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 918 idiag -= 49; 919 i2 -= 7; 920 for (i=m-2; i>=0; i--) { 921 v = aa + 49*(diag[i]+1); 922 vi = aj + diag[i] + 1; 923 nz = ai[i+1] - diag[i] - 1; 924 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 925 while (nz--) { 926 idx = 7*(*vi++); 927 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 928 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 929 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 930 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 931 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 932 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 933 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 934 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 935 v += 49; 936 } 937 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 938 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 939 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 940 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 941 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 942 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 943 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 944 idiag -= 49; 945 i2 -= 7; 946 } 947 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 948 } 949 } else { 950 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 951 } 952 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 953 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 /* 958 Special version for direct calls from Fortran (Used in PETSc-fun3d) 959 */ 960 #if defined(PETSC_HAVE_FORTRAN_CAPS) 961 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 962 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 963 #define matsetvaluesblocked4_ matsetvaluesblocked4 964 #endif 965 966 EXTERN_C_BEGIN 967 #undef __FUNCT__ 968 #define __FUNCT__ "matsetvaluesblocked4_" 969 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 970 { 971 Mat A = *AA; 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 974 PetscInt *ai=a->i,*ailen=a->ilen; 975 PetscInt *aj=a->j,stepval,lastcol = -1; 976 const PetscScalar *value = v; 977 MatScalar *ap,*aa = a->a,*bap; 978 979 PetscFunctionBegin; 980 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 981 stepval = (n-1)*4; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; 984 rp = aj + ai[row]; 985 ap = aa + 16*ai[row]; 986 nrow = ailen[row]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; 991 if (col <= lastcol) low = 0; else high = nrow; 992 lastcol = col; 993 value = v + k*(stepval+4 + l)*4; 994 while (high-low > 7) { 995 t = (low+high)/2; 996 if (rp[t] > col) high = t; 997 else low = t; 998 } 999 for (i=low; i<high; i++) { 1000 if (rp[i] > col) break; 1001 if (rp[i] == col) { 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] += *value++; 1006 } 1007 } 1008 goto noinsert2; 1009 } 1010 } 1011 N = nrow++ - 1; 1012 high++; /* added new column index thus must search to one higher than before */ 1013 /* shift up all the later entries in this row */ 1014 for (ii=N; ii>=i; ii--) { 1015 rp[ii+1] = rp[ii]; 1016 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1017 } 1018 if (N >= i) { 1019 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1020 } 1021 rp[i] = col; 1022 bap = ap + 16*i; 1023 for (ii=0; ii<4; ii++,value+=stepval) { 1024 for (jj=ii; jj<16; jj+=4) { 1025 bap[jj] = *value++; 1026 } 1027 } 1028 noinsert2:; 1029 low = i; 1030 } 1031 ailen[row] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 EXTERN_C_END 1036 1037 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1038 #define matsetvalues4_ MATSETVALUES4 1039 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1040 #define matsetvalues4_ matsetvalues4 1041 #endif 1042 1043 EXTERN_C_BEGIN 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatSetValues4_" 1046 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1047 { 1048 Mat A = *AA; 1049 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1050 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1051 PetscInt *ai=a->i,*ailen=a->ilen; 1052 PetscInt *aj=a->j,brow,bcol; 1053 PetscInt ridx,cidx,lastcol = -1; 1054 MatScalar *ap,value,*aa=a->a,*bap; 1055 1056 PetscFunctionBegin; 1057 for (k=0; k<m; k++) { /* loop over added rows */ 1058 row = im[k]; brow = row/4; 1059 rp = aj + ai[brow]; 1060 ap = aa + 16*ai[brow]; 1061 nrow = ailen[brow]; 1062 low = 0; 1063 high = nrow; 1064 for (l=0; l<n; l++) { /* loop over added columns */ 1065 col = in[l]; bcol = col/4; 1066 ridx = row % 4; cidx = col % 4; 1067 value = v[l + k*n]; 1068 if (col <= lastcol) low = 0; else high = nrow; 1069 lastcol = col; 1070 while (high-low > 7) { 1071 t = (low+high)/2; 1072 if (rp[t] > bcol) high = t; 1073 else low = t; 1074 } 1075 for (i=low; i<high; i++) { 1076 if (rp[i] > bcol) break; 1077 if (rp[i] == bcol) { 1078 bap = ap + 16*i + 4*cidx + ridx; 1079 *bap += value; 1080 goto noinsert1; 1081 } 1082 } 1083 N = nrow++ - 1; 1084 high++; /* added new column thus must search to one higher than before */ 1085 /* shift up all the later entries in this row */ 1086 for (ii=N; ii>=i; ii--) { 1087 rp[ii+1] = rp[ii]; 1088 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1089 } 1090 if (N>=i) { 1091 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1092 } 1093 rp[i] = bcol; 1094 ap[16*i + 4*cidx + ridx] = value; 1095 noinsert1:; 1096 low = i; 1097 } 1098 ailen[brow] = nrow; 1099 } 1100 PetscFunctionReturnVoid(); 1101 } 1102 EXTERN_C_END 1103 1104 /* 1105 Checks for missing diagonals 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1109 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt *diag,*jj = a->j,i; 1114 1115 PetscFunctionBegin; 1116 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1117 *missing = PETSC_FALSE; 1118 if (A->rmap->n > 0 && !jj) { 1119 *missing = PETSC_TRUE; 1120 if (d) *d = 0; 1121 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1122 } else { 1123 diag = a->diag; 1124 for (i=0; i<a->mbs; i++) { 1125 if (jj[diag[i]] != i) { 1126 *missing = PETSC_TRUE; 1127 if (d) *d = i; 1128 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1129 } 1130 } 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1137 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1138 { 1139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 PetscInt i,j,m = a->mbs; 1142 1143 PetscFunctionBegin; 1144 if (!a->diag) { 1145 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1146 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1147 a->free_diag = PETSC_TRUE; 1148 } 1149 for (i=0; i<m; i++) { 1150 a->diag[i] = a->i[i+1]; 1151 for (j=a->i[i]; j<a->i[i+1]; j++) { 1152 if (a->j[j] == i) { 1153 a->diag[i] = j; 1154 break; 1155 } 1156 } 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 1162 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1166 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1167 { 1168 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1171 PetscInt *tia, *tja; 1172 1173 PetscFunctionBegin; 1174 *nn = n; 1175 if (!ia) PetscFunctionReturn(0); 1176 if (symmetric) { 1177 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1178 nz = tia[n]; 1179 } else { 1180 tia = a->i; tja = a->j; 1181 } 1182 1183 if (!blockcompressed && bs > 1) { 1184 (*nn) *= bs; 1185 nbs = bs; 1186 /* malloc & create the natural set of indices */ 1187 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1188 if (n) { 1189 (*ia)[0] = 0; 1190 for (j=1; j<bs; j++) { 1191 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1192 } 1193 } 1194 1195 for (i=1; i<n; i++) { 1196 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1197 for (j=1; j<bs; j++) { 1198 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1199 } 1200 } 1201 if (n) { 1202 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1203 } 1204 1205 if (ja) { 1206 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1207 cnt = 0; 1208 for (i=0; i<n; i++) { 1209 for (j=0; j<bs; j++) { 1210 for (k=tia[i]; k<tia[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 (*ja)[cnt++] = bs*tja[k] + l; 1213 } 1214 } 1215 } 1216 } 1217 } 1218 1219 n *= bs; 1220 nz *= bs*bs; 1221 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1222 ierr = PetscFree(tia);CHKERRQ(ierr); 1223 ierr = PetscFree(tja);CHKERRQ(ierr); 1224 } 1225 } else if (oshift == 1) { 1226 if (symmetric) { 1227 PetscInt nz = tia[A->rmap->n/bs]; 1228 /* add 1 to i and j indices */ 1229 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1230 *ia = tia; 1231 if (ja) { 1232 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1233 *ja = tja; 1234 } 1235 } else { 1236 PetscInt nz = a->i[A->rmap->n/bs]; 1237 /* malloc space and add 1 to i and j indices */ 1238 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1239 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1240 if (ja) { 1241 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1242 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1243 } 1244 } 1245 } else { 1246 *ia = tia; 1247 if (ja) *ja = tja; 1248 } 1249 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1255 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1256 { 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 if (!ia) PetscFunctionReturn(0); 1261 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1262 ierr = PetscFree(*ia);CHKERRQ(ierr); 1263 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1270 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1278 #endif 1279 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1280 if (a->row) { 1281 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1282 } 1283 if (a->col) { 1284 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1285 } 1286 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1287 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1288 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1289 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1290 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1291 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1292 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1293 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1294 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1295 1296 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1297 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1298 ierr = PetscFree(a);CHKERRQ(ierr); 1299 1300 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1314 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1315 { 1316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 switch (op) { 1321 case MAT_ROW_ORIENTED: 1322 a->roworiented = flg; 1323 break; 1324 case MAT_KEEP_NONZERO_PATTERN: 1325 a->keepnonzeropattern = flg; 1326 break; 1327 case MAT_NEW_NONZERO_LOCATIONS: 1328 a->nonew = (flg ? 0 : 1); 1329 break; 1330 case MAT_NEW_NONZERO_LOCATION_ERR: 1331 a->nonew = (flg ? -1 : 0); 1332 break; 1333 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1334 a->nonew = (flg ? -2 : 0); 1335 break; 1336 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1337 a->nounused = (flg ? -1 : 0); 1338 break; 1339 case MAT_NEW_DIAGONALS: 1340 case MAT_IGNORE_OFF_PROC_ENTRIES: 1341 case MAT_USE_HASH_TABLE: 1342 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1343 break; 1344 case MAT_SYMMETRIC: 1345 case MAT_STRUCTURALLY_SYMMETRIC: 1346 case MAT_HERMITIAN: 1347 case MAT_SYMMETRY_ETERNAL: 1348 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1349 break; 1350 default: 1351 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1358 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1359 { 1360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1363 MatScalar *aa,*aa_i; 1364 PetscScalar *v_i; 1365 1366 PetscFunctionBegin; 1367 bs = A->rmap->bs; 1368 ai = a->i; 1369 aj = a->j; 1370 aa = a->a; 1371 bs2 = a->bs2; 1372 1373 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1374 1375 bn = row/bs; /* Block number */ 1376 bp = row % bs; /* Block Position */ 1377 M = ai[bn+1] - ai[bn]; 1378 *nz = bs*M; 1379 1380 if (v) { 1381 *v = 0; 1382 if (*nz) { 1383 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1384 for (i=0; i<M; i++) { /* for each block in the block row */ 1385 v_i = *v + i*bs; 1386 aa_i = aa + bs2*(ai[bn] + i); 1387 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1388 } 1389 } 1390 } 1391 1392 if (idx) { 1393 *idx = 0; 1394 if (*nz) { 1395 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1396 for (i=0; i<M; i++) { /* for each block in the block row */ 1397 idx_i = *idx + i*bs; 1398 itmp = bs*aj[ai[bn] + i]; 1399 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1400 } 1401 } 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1408 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1409 { 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1414 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1415 PetscFunctionReturn(0); 1416 } 1417 1418 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1422 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1423 { 1424 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1425 Mat C; 1426 PetscErrorCode ierr; 1427 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1428 PetscInt *rows,*cols,bs2=a->bs2; 1429 MatScalar *array; 1430 1431 PetscFunctionBegin; 1432 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1433 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1434 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1435 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1436 1437 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1438 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1439 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1440 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1441 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1442 ierr = PetscFree(col);CHKERRQ(ierr); 1443 } else { 1444 C = *B; 1445 } 1446 1447 array = a->a; 1448 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1449 for (i=0; i<mbs; i++) { 1450 cols[0] = i*bs; 1451 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1452 len = ai[i+1] - ai[i]; 1453 for (j=0; j<len; j++) { 1454 rows[0] = (*aj++)*bs; 1455 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1456 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1457 array += bs2; 1458 } 1459 } 1460 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1461 1462 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 1465 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1466 *B = C; 1467 } else { 1468 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1475 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1476 { 1477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1480 int fd; 1481 PetscScalar *aa; 1482 FILE *file; 1483 1484 PetscFunctionBegin; 1485 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1486 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1487 col_lens[0] = MAT_FILE_COOKIE; 1488 1489 col_lens[1] = A->rmap->N; 1490 col_lens[2] = A->cmap->n; 1491 col_lens[3] = a->nz*bs2; 1492 1493 /* store lengths of each row and write (including header) to file */ 1494 count = 0; 1495 for (i=0; i<a->mbs; i++) { 1496 for (j=0; j<bs; j++) { 1497 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1498 } 1499 } 1500 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1501 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1502 1503 /* store column indices (zero start index) */ 1504 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1505 count = 0; 1506 for (i=0; i<a->mbs; i++) { 1507 for (j=0; j<bs; j++) { 1508 for (k=a->i[i]; k<a->i[i+1]; k++) { 1509 for (l=0; l<bs; l++) { 1510 jj[count++] = bs*a->j[k] + l; 1511 } 1512 } 1513 } 1514 } 1515 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1516 ierr = PetscFree(jj);CHKERRQ(ierr); 1517 1518 /* store nonzero values */ 1519 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1520 count = 0; 1521 for (i=0; i<a->mbs; i++) { 1522 for (j=0; j<bs; j++) { 1523 for (k=a->i[i]; k<a->i[i+1]; k++) { 1524 for (l=0; l<bs; l++) { 1525 aa[count++] = a->a[bs2*k + l*bs + j]; 1526 } 1527 } 1528 } 1529 } 1530 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1531 ierr = PetscFree(aa);CHKERRQ(ierr); 1532 1533 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1534 if (file) { 1535 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1543 { 1544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1545 PetscErrorCode ierr; 1546 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1547 PetscViewerFormat format; 1548 1549 PetscFunctionBegin; 1550 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1551 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1552 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1553 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1554 Mat aij; 1555 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1556 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1557 ierr = MatDestroy(aij);CHKERRQ(ierr); 1558 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1559 PetscFunctionReturn(0); 1560 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1562 for (i=0; i<a->mbs; i++) { 1563 for (j=0; j<bs; j++) { 1564 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1565 for (k=a->i[i]; k<a->i[i+1]; k++) { 1566 for (l=0; l<bs; l++) { 1567 #if defined(PETSC_USE_COMPLEX) 1568 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1570 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1573 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } 1577 #else 1578 if (a->a[bs2*k + l*bs + j] != 0.0) { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1580 } 1581 #endif 1582 } 1583 } 1584 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1585 } 1586 } 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1588 } else { 1589 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1590 for (i=0; i<a->mbs; i++) { 1591 for (j=0; j<bs; j++) { 1592 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1593 for (k=a->i[i]; k<a->i[i+1]; k++) { 1594 for (l=0; l<bs; l++) { 1595 #if defined(PETSC_USE_COMPLEX) 1596 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1597 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1598 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1599 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1600 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1601 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } else { 1603 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1604 } 1605 #else 1606 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1607 #endif 1608 } 1609 } 1610 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1611 } 1612 } 1613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1614 } 1615 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1621 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1622 { 1623 Mat A = (Mat) Aa; 1624 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1625 PetscErrorCode ierr; 1626 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1627 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1628 MatScalar *aa; 1629 PetscViewer viewer; 1630 1631 PetscFunctionBegin; 1632 1633 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1634 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1635 1636 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1637 1638 /* loop over matrix elements drawing boxes */ 1639 color = PETSC_DRAW_BLUE; 1640 for (i=0,row=0; i<mbs; i++,row+=bs) { 1641 for (j=a->i[i]; j<a->i[i+1]; j++) { 1642 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1644 aa = a->a + j*bs2; 1645 for (k=0; k<bs; k++) { 1646 for (l=0; l<bs; l++) { 1647 if (PetscRealPart(*aa++) >= 0.) continue; 1648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 } 1653 color = PETSC_DRAW_CYAN; 1654 for (i=0,row=0; i<mbs; i++,row+=bs) { 1655 for (j=a->i[i]; j<a->i[i+1]; j++) { 1656 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1657 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1658 aa = a->a + j*bs2; 1659 for (k=0; k<bs; k++) { 1660 for (l=0; l<bs; l++) { 1661 if (PetscRealPart(*aa++) != 0.) continue; 1662 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1663 } 1664 } 1665 } 1666 } 1667 1668 color = PETSC_DRAW_RED; 1669 for (i=0,row=0; i<mbs; i++,row+=bs) { 1670 for (j=a->i[i]; j<a->i[i+1]; j++) { 1671 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1673 aa = a->a + j*bs2; 1674 for (k=0; k<bs; k++) { 1675 for (l=0; l<bs; l++) { 1676 if (PetscRealPart(*aa++) <= 0.) continue; 1677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1678 } 1679 } 1680 } 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1688 { 1689 PetscErrorCode ierr; 1690 PetscReal xl,yl,xr,yr,w,h; 1691 PetscDraw draw; 1692 PetscTruth isnull; 1693 1694 PetscFunctionBegin; 1695 1696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1698 1699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1700 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1701 xr += w; yr += h; xl = -w; yl = -h; 1702 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatView_SeqBAIJ" 1710 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1711 { 1712 PetscErrorCode ierr; 1713 PetscTruth iascii,isbinary,isdraw; 1714 1715 PetscFunctionBegin; 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1718 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1719 if (iascii){ 1720 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1721 } else if (isbinary) { 1722 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1723 } else if (isdraw) { 1724 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1725 } else { 1726 Mat B; 1727 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1728 ierr = MatView(B,viewer);CHKERRQ(ierr); 1729 ierr = MatDestroy(B);CHKERRQ(ierr); 1730 } 1731 PetscFunctionReturn(0); 1732 } 1733 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1737 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1738 { 1739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1740 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1741 PetscInt *ai = a->i,*ailen = a->ilen; 1742 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1743 MatScalar *ap,*aa = a->a; 1744 1745 PetscFunctionBegin; 1746 for (k=0; k<m; k++) { /* loop over rows */ 1747 row = im[k]; brow = row/bs; 1748 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1749 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1750 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1751 nrow = ailen[brow]; 1752 for (l=0; l<n; l++) { /* loop over columns */ 1753 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1754 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1755 col = in[l] ; 1756 bcol = col/bs; 1757 cidx = col%bs; 1758 ridx = row%bs; 1759 high = nrow; 1760 low = 0; /* assume unsorted */ 1761 while (high-low > 5) { 1762 t = (low+high)/2; 1763 if (rp[t] > bcol) high = t; 1764 else low = t; 1765 } 1766 for (i=low; i<high; i++) { 1767 if (rp[i] > bcol) break; 1768 if (rp[i] == bcol) { 1769 *v++ = ap[bs2*i+bs*cidx+ridx]; 1770 goto finished; 1771 } 1772 } 1773 *v++ = 0.0; 1774 finished:; 1775 } 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #define CHUNKSIZE 10 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1783 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1784 { 1785 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1786 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1787 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1788 PetscErrorCode ierr; 1789 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1790 PetscTruth roworiented=a->roworiented; 1791 const PetscScalar *value = v; 1792 MatScalar *ap,*aa = a->a,*bap; 1793 1794 PetscFunctionBegin; 1795 if (roworiented) { 1796 stepval = (n-1)*bs; 1797 } else { 1798 stepval = (m-1)*bs; 1799 } 1800 for (k=0; k<m; k++) { /* loop over added rows */ 1801 row = im[k]; 1802 if (row < 0) continue; 1803 #if defined(PETSC_USE_DEBUG) 1804 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1805 #endif 1806 rp = aj + ai[row]; 1807 ap = aa + bs2*ai[row]; 1808 rmax = imax[row]; 1809 nrow = ailen[row]; 1810 low = 0; 1811 high = nrow; 1812 for (l=0; l<n; l++) { /* loop over added columns */ 1813 if (in[l] < 0) continue; 1814 #if defined(PETSC_USE_DEBUG) 1815 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1816 #endif 1817 col = in[l]; 1818 if (roworiented) { 1819 value = v + k*(stepval+bs)*bs + l*bs; 1820 } else { 1821 value = v + l*(stepval+bs)*bs + k*bs; 1822 } 1823 if (col <= lastcol) low = 0; else high = nrow; 1824 lastcol = col; 1825 while (high-low > 7) { 1826 t = (low+high)/2; 1827 if (rp[t] > col) high = t; 1828 else low = t; 1829 } 1830 for (i=low; i<high; i++) { 1831 if (rp[i] > col) break; 1832 if (rp[i] == col) { 1833 bap = ap + bs2*i; 1834 if (roworiented) { 1835 if (is == ADD_VALUES) { 1836 for (ii=0; ii<bs; ii++,value+=stepval) { 1837 for (jj=ii; jj<bs2; jj+=bs) { 1838 bap[jj] += *value++; 1839 } 1840 } 1841 } else { 1842 for (ii=0; ii<bs; ii++,value+=stepval) { 1843 for (jj=ii; jj<bs2; jj+=bs) { 1844 bap[jj] = *value++; 1845 } 1846 } 1847 } 1848 } else { 1849 if (is == ADD_VALUES) { 1850 for (ii=0; ii<bs; ii++,value+=stepval) { 1851 for (jj=0; jj<bs; jj++) { 1852 *bap++ += *value++; 1853 } 1854 } 1855 } else { 1856 for (ii=0; ii<bs; ii++,value+=stepval) { 1857 for (jj=0; jj<bs; jj++) { 1858 *bap++ = *value++; 1859 } 1860 } 1861 } 1862 } 1863 goto noinsert2; 1864 } 1865 } 1866 if (nonew == 1) goto noinsert2; 1867 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1868 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1869 N = nrow++ - 1; high++; 1870 /* shift up all the later entries in this row */ 1871 for (ii=N; ii>=i; ii--) { 1872 rp[ii+1] = rp[ii]; 1873 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1874 } 1875 if (N >= i) { 1876 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1877 } 1878 rp[i] = col; 1879 bap = ap + bs2*i; 1880 if (roworiented) { 1881 for (ii=0; ii<bs; ii++,value+=stepval) { 1882 for (jj=ii; jj<bs2; jj+=bs) { 1883 bap[jj] = *value++; 1884 } 1885 } 1886 } else { 1887 for (ii=0; ii<bs; ii++,value+=stepval) { 1888 for (jj=0; jj<bs; jj++) { 1889 *bap++ = *value++; 1890 } 1891 } 1892 } 1893 noinsert2:; 1894 low = i; 1895 } 1896 ailen[row] = nrow; 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1903 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1904 { 1905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1906 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1907 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1908 PetscErrorCode ierr; 1909 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1910 MatScalar *aa = a->a,*ap; 1911 PetscReal ratio=0.6; 1912 1913 PetscFunctionBegin; 1914 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1915 1916 if (m) rmax = ailen[0]; 1917 for (i=1; i<mbs; i++) { 1918 /* move each row back by the amount of empty slots (fshift) before it*/ 1919 fshift += imax[i-1] - ailen[i-1]; 1920 rmax = PetscMax(rmax,ailen[i]); 1921 if (fshift) { 1922 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1923 N = ailen[i]; 1924 for (j=0; j<N; j++) { 1925 ip[j-fshift] = ip[j]; 1926 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1927 } 1928 } 1929 ai[i] = ai[i-1] + ailen[i-1]; 1930 } 1931 if (mbs) { 1932 fshift += imax[mbs-1] - ailen[mbs-1]; 1933 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1934 } 1935 /* reset ilen and imax for each row */ 1936 for (i=0; i<mbs; i++) { 1937 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1938 } 1939 a->nz = ai[mbs]; 1940 1941 /* diagonals may have moved, so kill the diagonal pointers */ 1942 a->idiagvalid = PETSC_FALSE; 1943 if (fshift && a->diag) { 1944 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1945 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1946 a->diag = 0; 1947 } 1948 if (fshift && a->nounused == -1) { 1949 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1950 } 1951 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1953 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1954 a->reallocs = 0; 1955 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1956 1957 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1958 if (a->compressedrow.use){ 1959 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1960 } 1961 1962 A->same_nonzero = PETSC_TRUE; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* 1967 This function returns an array of flags which indicate the locations of contiguous 1968 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1969 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1970 Assume: sizes should be long enough to hold all the values. 1971 */ 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1974 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1975 { 1976 PetscInt i,j,k,row; 1977 PetscTruth flg; 1978 1979 PetscFunctionBegin; 1980 for (i=0,j=0; i<n; j++) { 1981 row = idx[i]; 1982 if (row%bs!=0) { /* Not the begining of a block */ 1983 sizes[j] = 1; 1984 i++; 1985 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1986 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1987 i++; 1988 } else { /* Begining of the block, so check if the complete block exists */ 1989 flg = PETSC_TRUE; 1990 for (k=1; k<bs; k++) { 1991 if (row+k != idx[i+k]) { /* break in the block */ 1992 flg = PETSC_FALSE; 1993 break; 1994 } 1995 } 1996 if (flg) { /* No break in the bs */ 1997 sizes[j] = bs; 1998 i+= bs; 1999 } else { 2000 sizes[j] = 1; 2001 i++; 2002 } 2003 } 2004 } 2005 *bs_max = j; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2012 { 2013 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,k,count,*rows; 2016 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2017 PetscScalar zero = 0.0; 2018 MatScalar *aa; 2019 2020 PetscFunctionBegin; 2021 /* Make a copy of the IS and sort it */ 2022 /* allocate memory for rows,sizes */ 2023 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2024 2025 /* copy IS values to rows, and sort them */ 2026 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2027 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2028 if (baij->keepnonzeropattern) { 2029 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2030 bs_max = is_n; 2031 A->same_nonzero = PETSC_TRUE; 2032 } else { 2033 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2034 A->same_nonzero = PETSC_FALSE; 2035 } 2036 2037 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2038 row = rows[j]; 2039 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2040 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2041 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2042 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2043 if (diag != 0.0) { 2044 if (baij->ilen[row/bs] > 0) { 2045 baij->ilen[row/bs] = 1; 2046 baij->j[baij->i[row/bs]] = row/bs; 2047 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2048 } 2049 /* Now insert all the diagonal values for this bs */ 2050 for (k=0; k<bs; k++) { 2051 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2052 } 2053 } else { /* (diag == 0.0) */ 2054 baij->ilen[row/bs] = 0; 2055 } /* end (diag == 0.0) */ 2056 } else { /* (sizes[i] != bs) */ 2057 #if defined (PETSC_USE_DEBUG) 2058 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2059 #endif 2060 for (k=0; k<count; k++) { 2061 aa[0] = zero; 2062 aa += bs; 2063 } 2064 if (diag != 0.0) { 2065 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2066 } 2067 } 2068 } 2069 2070 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2071 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2077 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2078 { 2079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2080 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2081 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2082 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2083 PetscErrorCode ierr; 2084 PetscInt ridx,cidx,bs2=a->bs2; 2085 PetscTruth roworiented=a->roworiented; 2086 MatScalar *ap,value,*aa=a->a,*bap; 2087 2088 PetscFunctionBegin; 2089 if (v) PetscValidScalarPointer(v,6); 2090 for (k=0; k<m; k++) { /* loop over added rows */ 2091 row = im[k]; 2092 brow = row/bs; 2093 if (row < 0) continue; 2094 #if defined(PETSC_USE_DEBUG) 2095 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2096 #endif 2097 rp = aj + ai[brow]; 2098 ap = aa + bs2*ai[brow]; 2099 rmax = imax[brow]; 2100 nrow = ailen[brow]; 2101 low = 0; 2102 high = nrow; 2103 for (l=0; l<n; l++) { /* loop over added columns */ 2104 if (in[l] < 0) continue; 2105 #if defined(PETSC_USE_DEBUG) 2106 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2107 #endif 2108 col = in[l]; bcol = col/bs; 2109 ridx = row % bs; cidx = col % bs; 2110 if (roworiented) { 2111 value = v[l + k*n]; 2112 } else { 2113 value = v[k + l*m]; 2114 } 2115 if (col <= lastcol) low = 0; else high = nrow; 2116 lastcol = col; 2117 while (high-low > 7) { 2118 t = (low+high)/2; 2119 if (rp[t] > bcol) high = t; 2120 else low = t; 2121 } 2122 for (i=low; i<high; i++) { 2123 if (rp[i] > bcol) break; 2124 if (rp[i] == bcol) { 2125 bap = ap + bs2*i + bs*cidx + ridx; 2126 if (is == ADD_VALUES) *bap += value; 2127 else *bap = value; 2128 goto noinsert1; 2129 } 2130 } 2131 if (nonew == 1) goto noinsert1; 2132 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2134 N = nrow++ - 1; high++; 2135 /* shift up all the later entries in this row */ 2136 for (ii=N; ii>=i; ii--) { 2137 rp[ii+1] = rp[ii]; 2138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2139 } 2140 if (N>=i) { 2141 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2142 } 2143 rp[i] = bcol; 2144 ap[bs2*i + bs*cidx + ridx] = value; 2145 a->nz++; 2146 noinsert1:; 2147 low = i; 2148 } 2149 ailen[brow] = nrow; 2150 } 2151 A->same_nonzero = PETSC_FALSE; 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2157 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2158 { 2159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2160 Mat outA; 2161 PetscErrorCode ierr; 2162 PetscTruth row_identity,col_identity; 2163 2164 PetscFunctionBegin; 2165 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2166 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2167 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2168 if (!row_identity || !col_identity) { 2169 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2170 } 2171 2172 outA = inA; 2173 inA->factor = MAT_FACTOR_LU; 2174 2175 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2176 2177 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2178 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2179 a->row = row; 2180 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2181 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2182 a->col = col; 2183 2184 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2185 if (a->icol) { 2186 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2187 } 2188 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2189 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2190 2191 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2192 if (!a->solve_work) { 2193 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2194 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2195 } 2196 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2197 2198 PetscFunctionReturn(0); 2199 } 2200 2201 EXTERN_C_BEGIN 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2204 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2205 { 2206 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2207 PetscInt i,nz,mbs; 2208 2209 PetscFunctionBegin; 2210 nz = baij->maxnz/baij->bs2; 2211 mbs = baij->mbs; 2212 for (i=0; i<nz; i++) { 2213 baij->j[i] = indices[i]; 2214 } 2215 baij->nz = nz; 2216 for (i=0; i<mbs; i++) { 2217 baij->ilen[i] = baij->imax[i]; 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 EXTERN_C_END 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2225 /*@ 2226 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2227 in the matrix. 2228 2229 Input Parameters: 2230 + mat - the SeqBAIJ matrix 2231 - indices - the column indices 2232 2233 Level: advanced 2234 2235 Notes: 2236 This can be called if you have precomputed the nonzero structure of the 2237 matrix and want to provide it to the matrix object to improve the performance 2238 of the MatSetValues() operation. 2239 2240 You MUST have set the correct numbers of nonzeros per row in the call to 2241 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2242 2243 MUST be called before any calls to MatSetValues(); 2244 2245 @*/ 2246 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2247 { 2248 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2249 2250 PetscFunctionBegin; 2251 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2252 PetscValidPointer(indices,2); 2253 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2254 if (f) { 2255 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2256 } else { 2257 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2264 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2265 { 2266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2267 PetscErrorCode ierr; 2268 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2269 PetscReal atmp; 2270 PetscScalar *x,zero = 0.0; 2271 MatScalar *aa; 2272 PetscInt ncols,brow,krow,kcol; 2273 2274 PetscFunctionBegin; 2275 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2276 bs = A->rmap->bs; 2277 aa = a->a; 2278 ai = a->i; 2279 aj = a->j; 2280 mbs = a->mbs; 2281 2282 ierr = VecSet(v,zero);CHKERRQ(ierr); 2283 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2284 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2285 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2286 for (i=0; i<mbs; i++) { 2287 ncols = ai[1] - ai[0]; ai++; 2288 brow = bs*i; 2289 for (j=0; j<ncols; j++){ 2290 for (kcol=0; kcol<bs; kcol++){ 2291 for (krow=0; krow<bs; krow++){ 2292 atmp = PetscAbsScalar(*aa);aa++; 2293 row = brow + krow; /* row index */ 2294 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2295 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2296 } 2297 } 2298 aj++; 2299 } 2300 } 2301 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatCopy_SeqBAIJ" 2307 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2308 { 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 /* If the two matrices have the same copy implementation, use fast copy. */ 2313 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2315 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2316 2317 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2318 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2319 } 2320 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2321 } else { 2322 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2323 } 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2329 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2330 { 2331 PetscErrorCode ierr; 2332 2333 PetscFunctionBegin; 2334 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2340 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2341 { 2342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2343 PetscFunctionBegin; 2344 *array = a->a; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2350 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2351 { 2352 PetscFunctionBegin; 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #include "petscblaslapack.h" 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2359 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2360 { 2361 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2362 PetscErrorCode ierr; 2363 PetscInt i,bs=Y->rmap->bs,j,bs2; 2364 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2365 2366 PetscFunctionBegin; 2367 if (str == SAME_NONZERO_PATTERN) { 2368 PetscScalar alpha = a; 2369 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2370 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2371 if (y->xtoy && y->XtoY != X) { 2372 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2373 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2374 } 2375 if (!y->xtoy) { /* get xtoy */ 2376 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2377 y->XtoY = X; 2378 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2379 } 2380 bs2 = bs*bs; 2381 for (i=0; i<x->nz; i++) { 2382 j = 0; 2383 while (j < bs2){ 2384 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2385 j++; 2386 } 2387 } 2388 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2389 } else { 2390 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2397 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2398 { 2399 PetscInt rbs,cbs; 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2404 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2405 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2406 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2412 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2413 { 2414 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2415 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2416 MatScalar *aa = a->a; 2417 2418 PetscFunctionBegin; 2419 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2425 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2426 { 2427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2428 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2429 MatScalar *aa = a->a; 2430 2431 PetscFunctionBegin; 2432 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2437 2438 #undef __FUNCT__ 2439 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2440 /* 2441 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2442 */ 2443 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2444 { 2445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2446 PetscErrorCode ierr; 2447 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2448 PetscInt nz = a->i[m],row,*jj,mr,col; 2449 2450 PetscFunctionBegin; 2451 *nn = n; 2452 if (!ia) PetscFunctionReturn(0); 2453 if (symmetric) { 2454 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2455 } else { 2456 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2457 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2458 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2459 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2460 jj = a->j; 2461 for (i=0; i<nz; i++) { 2462 collengths[jj[i]]++; 2463 } 2464 cia[0] = oshift; 2465 for (i=0; i<n; i++) { 2466 cia[i+1] = cia[i] + collengths[i]; 2467 } 2468 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2469 jj = a->j; 2470 for (row=0; row<m; row++) { 2471 mr = a->i[row+1] - a->i[row]; 2472 for (i=0; i<mr; i++) { 2473 col = *jj++; 2474 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2475 } 2476 } 2477 ierr = PetscFree(collengths);CHKERRQ(ierr); 2478 *ia = cia; *ja = cja; 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2485 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2486 { 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 if (!ia) PetscFunctionReturn(0); 2491 ierr = PetscFree(*ia);CHKERRQ(ierr); 2492 ierr = PetscFree(*ja);CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2498 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2499 { 2500 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2501 PetscErrorCode ierr; 2502 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2503 PetscScalar dx,*y,*xx,*w3_array; 2504 PetscScalar *vscale_array; 2505 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2506 Vec w1=coloring->w1,w2=coloring->w2,w3; 2507 void *fctx = coloring->fctx; 2508 PetscTruth flg = PETSC_FALSE; 2509 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2510 Vec x1_tmp; 2511 2512 PetscFunctionBegin; 2513 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2514 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2515 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2516 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2517 2518 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2519 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2520 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2521 if (flg) { 2522 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2523 } else { 2524 PetscTruth assembled; 2525 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2526 if (assembled) { 2527 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2528 } 2529 } 2530 2531 x1_tmp = x1; 2532 if (!coloring->vscale){ 2533 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2534 } 2535 2536 /* 2537 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2538 coloring->F for the coarser grids from the finest 2539 */ 2540 if (coloring->F) { 2541 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2542 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2543 if (m1 != m2) { 2544 coloring->F = 0; 2545 } 2546 } 2547 2548 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2549 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2550 } 2551 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2552 2553 /* Set w1 = F(x1) */ 2554 if (coloring->F) { 2555 w1 = coloring->F; /* use already computed value of function */ 2556 coloring->F = 0; 2557 } else { 2558 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2559 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2560 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2561 } 2562 2563 if (!coloring->w3) { 2564 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2565 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2566 } 2567 w3 = coloring->w3; 2568 2569 CHKMEMQ; 2570 /* Compute all the local scale factors, including ghost points */ 2571 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2572 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2573 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2574 if (ctype == IS_COLORING_GHOSTED){ 2575 col_start = 0; col_end = N; 2576 } else if (ctype == IS_COLORING_GLOBAL){ 2577 xx = xx - start; 2578 vscale_array = vscale_array - start; 2579 col_start = start; col_end = N + start; 2580 } CHKMEMQ; 2581 for (col=col_start; col<col_end; col++){ 2582 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2583 if (coloring->htype[0] == 'w') { 2584 dx = 1.0 + unorm; 2585 } else { 2586 dx = xx[col]; 2587 } 2588 if (dx == 0.0) dx = 1.0; 2589 #if !defined(PETSC_USE_COMPLEX) 2590 if (dx < umin && dx >= 0.0) dx = umin; 2591 else if (dx < 0.0 && dx > -umin) dx = -umin; 2592 #else 2593 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2594 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2595 #endif 2596 dx *= epsilon; 2597 vscale_array[col] = 1.0/dx; 2598 } CHKMEMQ; 2599 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2600 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2601 if (ctype == IS_COLORING_GLOBAL){ 2602 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2603 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2604 } 2605 CHKMEMQ; 2606 if (coloring->vscaleforrow) { 2607 vscaleforrow = coloring->vscaleforrow; 2608 } else { 2609 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2610 } 2611 2612 2613 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2614 /* 2615 Loop over each color 2616 */ 2617 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2618 for (k=0; k<coloring->ncolors; k++) { 2619 coloring->currentcolor = k; 2620 for (i=0; i<bs; i++) { 2621 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2622 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2623 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2624 /* 2625 Loop over each column associated with color 2626 adding the perturbation to the vector w3. 2627 */ 2628 for (l=0; l<coloring->ncolumns[k]; l++) { 2629 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2630 if (coloring->htype[0] == 'w') { 2631 dx = 1.0 + unorm; 2632 } else { 2633 dx = xx[col]; 2634 } 2635 if (dx == 0.0) dx = 1.0; 2636 #if !defined(PETSC_USE_COMPLEX) 2637 if (dx < umin && dx >= 0.0) dx = umin; 2638 else if (dx < 0.0 && dx > -umin) dx = -umin; 2639 #else 2640 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2641 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2642 #endif 2643 dx *= epsilon; 2644 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2645 w3_array[col] += dx; 2646 } 2647 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2648 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2649 2650 /* 2651 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2652 w2 = F(x1 + dx) - F(x1) 2653 */ 2654 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2655 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2656 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2657 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2658 2659 /* 2660 Loop over rows of vector, putting results into Jacobian matrix 2661 */ 2662 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2663 for (l=0; l<coloring->nrows[k]; l++) { 2664 row = bs*coloring->rows[k][l]; /* local row index */ 2665 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2666 for (j=0; j<bs; j++) { 2667 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2668 srows[j] = row + start + j; 2669 } 2670 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2671 } 2672 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2673 } 2674 } /* endof for each color */ 2675 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2676 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2677 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2678 ierr = PetscFree(srows);CHKERRQ(ierr); 2679 2680 coloring->currentcolor = -1; 2681 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2682 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 /* -------------------------------------------------------------------*/ 2688 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2689 MatGetRow_SeqBAIJ, 2690 MatRestoreRow_SeqBAIJ, 2691 MatMult_SeqBAIJ_N, 2692 /* 4*/ MatMultAdd_SeqBAIJ_N, 2693 MatMultTranspose_SeqBAIJ, 2694 MatMultTransposeAdd_SeqBAIJ, 2695 0, 2696 0, 2697 0, 2698 /*10*/ 0, 2699 MatLUFactor_SeqBAIJ, 2700 0, 2701 0, 2702 MatTranspose_SeqBAIJ, 2703 /*15*/ MatGetInfo_SeqBAIJ, 2704 MatEqual_SeqBAIJ, 2705 MatGetDiagonal_SeqBAIJ, 2706 MatDiagonalScale_SeqBAIJ, 2707 MatNorm_SeqBAIJ, 2708 /*20*/ 0, 2709 MatAssemblyEnd_SeqBAIJ, 2710 MatSetOption_SeqBAIJ, 2711 MatZeroEntries_SeqBAIJ, 2712 /*24*/ MatZeroRows_SeqBAIJ, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2718 0, 2719 0, 2720 MatGetArray_SeqBAIJ, 2721 MatRestoreArray_SeqBAIJ, 2722 /*34*/ MatDuplicate_SeqBAIJ, 2723 0, 2724 0, 2725 MatILUFactor_SeqBAIJ, 2726 0, 2727 /*39*/ MatAXPY_SeqBAIJ, 2728 MatGetSubMatrices_SeqBAIJ, 2729 MatIncreaseOverlap_SeqBAIJ, 2730 MatGetValues_SeqBAIJ, 2731 MatCopy_SeqBAIJ, 2732 /*44*/ 0, 2733 MatScale_SeqBAIJ, 2734 0, 2735 0, 2736 0, 2737 /*49*/ MatSetBlockSize_SeqBAIJ, 2738 MatGetRowIJ_SeqBAIJ, 2739 MatRestoreRowIJ_SeqBAIJ, 2740 MatGetColumnIJ_SeqBAIJ, 2741 MatRestoreColumnIJ_SeqBAIJ, 2742 /*54*/ MatFDColoringCreate_SeqAIJ, 2743 0, 2744 0, 2745 0, 2746 MatSetValuesBlocked_SeqBAIJ, 2747 /*59*/ MatGetSubMatrix_SeqBAIJ, 2748 MatDestroy_SeqBAIJ, 2749 MatView_SeqBAIJ, 2750 0, 2751 0, 2752 /*64*/ 0, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2758 0, 2759 MatConvert_Basic, 2760 0, 2761 0, 2762 /*74*/ 0, 2763 MatFDColoringApply_BAIJ, 2764 0, 2765 0, 2766 0, 2767 /*79*/ 0, 2768 0, 2769 0, 2770 0, 2771 MatLoad_SeqBAIJ, 2772 /*84*/ 0, 2773 0, 2774 0, 2775 0, 2776 0, 2777 /*89*/ 0, 2778 0, 2779 0, 2780 0, 2781 0, 2782 /*94*/ 0, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /*99*/0, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*104*/0, 2793 MatRealPart_SeqBAIJ, 2794 MatImaginaryPart_SeqBAIJ, 2795 0, 2796 0, 2797 /*109*/0, 2798 0, 2799 0, 2800 0, 2801 MatMissingDiagonal_SeqBAIJ, 2802 /*114*/0, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*119*/0, 2808 0, 2809 MatMultHermitianTranspose_SeqBAIJ, 2810 MatMultHermitianTransposeAdd_SeqBAIJ 2811 }; 2812 2813 EXTERN_C_BEGIN 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2816 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2817 { 2818 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2819 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2820 PetscErrorCode ierr; 2821 2822 PetscFunctionBegin; 2823 if (aij->nonew != 1) { 2824 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2825 } 2826 2827 /* allocate space for values if not already there */ 2828 if (!aij->saved_values) { 2829 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2830 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2831 } 2832 2833 /* copy values over */ 2834 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2835 PetscFunctionReturn(0); 2836 } 2837 EXTERN_C_END 2838 2839 EXTERN_C_BEGIN 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2842 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2843 { 2844 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2845 PetscErrorCode ierr; 2846 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2847 2848 PetscFunctionBegin; 2849 if (aij->nonew != 1) { 2850 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2851 } 2852 if (!aij->saved_values) { 2853 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2854 } 2855 2856 /* copy values over */ 2857 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 EXTERN_C_END 2861 2862 EXTERN_C_BEGIN 2863 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2864 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2865 EXTERN_C_END 2866 2867 EXTERN_C_BEGIN 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2870 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2871 { 2872 Mat_SeqBAIJ *b; 2873 PetscErrorCode ierr; 2874 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2875 PetscTruth flg,skipallocation = PETSC_FALSE; 2876 2877 PetscFunctionBegin; 2878 2879 if (nz == MAT_SKIP_ALLOCATION) { 2880 skipallocation = PETSC_TRUE; 2881 nz = 0; 2882 } 2883 2884 if (bs < 0) { 2885 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2886 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2887 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2888 bs = PetscAbs(bs); 2889 } 2890 if (nnz && newbs != bs) { 2891 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2892 } 2893 bs = newbs; 2894 2895 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2896 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2897 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2898 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2899 2900 B->preallocated = PETSC_TRUE; 2901 2902 mbs = B->rmap->n/bs; 2903 nbs = B->cmap->n/bs; 2904 bs2 = bs*bs; 2905 2906 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2907 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2908 } 2909 2910 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2911 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2912 if (nnz) { 2913 for (i=0; i<mbs; i++) { 2914 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2915 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2916 } 2917 } 2918 2919 b = (Mat_SeqBAIJ*)B->data; 2920 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2921 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2922 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2923 2924 if (!flg) { 2925 switch (bs) { 2926 case 1: 2927 B->ops->mult = MatMult_SeqBAIJ_1; 2928 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2929 B->ops->sor = MatSOR_SeqBAIJ_1; 2930 break; 2931 case 2: 2932 B->ops->mult = MatMult_SeqBAIJ_2; 2933 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2934 B->ops->sor = MatSOR_SeqBAIJ_2; 2935 break; 2936 case 3: 2937 B->ops->mult = MatMult_SeqBAIJ_3; 2938 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2939 B->ops->sor = MatSOR_SeqBAIJ_3; 2940 break; 2941 case 4: 2942 B->ops->mult = MatMult_SeqBAIJ_4; 2943 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2944 B->ops->sor = MatSOR_SeqBAIJ_4; 2945 break; 2946 case 5: 2947 B->ops->mult = MatMult_SeqBAIJ_5; 2948 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2949 B->ops->sor = MatSOR_SeqBAIJ_5; 2950 break; 2951 case 6: 2952 B->ops->mult = MatMult_SeqBAIJ_6; 2953 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2954 B->ops->sor = MatSOR_SeqBAIJ_6; 2955 break; 2956 case 7: 2957 B->ops->mult = MatMult_SeqBAIJ_7; 2958 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2959 B->ops->sor = MatSOR_SeqBAIJ_7; 2960 break; 2961 default: 2962 B->ops->mult = MatMult_SeqBAIJ_N; 2963 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2964 break; 2965 } 2966 } 2967 B->rmap->bs = bs; 2968 b->mbs = mbs; 2969 b->nbs = nbs; 2970 if (!skipallocation) { 2971 if (!b->imax) { 2972 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2973 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2974 b->free_imax_ilen = PETSC_TRUE; 2975 } 2976 /* b->ilen will count nonzeros in each block row so far. */ 2977 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2978 if (!nnz) { 2979 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2980 else if (nz <= 0) nz = 1; 2981 for (i=0; i<mbs; i++) b->imax[i] = nz; 2982 nz = nz*mbs; 2983 } else { 2984 nz = 0; 2985 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2986 } 2987 2988 /* allocate the matrix space */ 2989 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2990 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2991 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2992 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2993 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2994 b->singlemalloc = PETSC_TRUE; 2995 b->i[0] = 0; 2996 for (i=1; i<mbs+1; i++) { 2997 b->i[i] = b->i[i-1] + b->imax[i-1]; 2998 } 2999 b->free_a = PETSC_TRUE; 3000 b->free_ij = PETSC_TRUE; 3001 } else { 3002 b->free_a = PETSC_FALSE; 3003 b->free_ij = PETSC_FALSE; 3004 } 3005 3006 B->rmap->bs = bs; 3007 b->bs2 = bs2; 3008 b->mbs = mbs; 3009 b->nz = 0; 3010 b->maxnz = nz*bs2; 3011 B->info.nz_unneeded = (PetscReal)b->maxnz; 3012 PetscFunctionReturn(0); 3013 } 3014 EXTERN_C_END 3015 3016 EXTERN_C_BEGIN 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3019 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3020 { 3021 PetscInt i,m,nz,nz_max=0,*nnz; 3022 PetscScalar *values=0; 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 3027 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3028 3029 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3031 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3032 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3033 m = B->rmap->n/bs; 3034 3035 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3036 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3037 for(i=0; i<m; i++) { 3038 nz = ii[i+1]- ii[i]; 3039 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3040 nz_max = PetscMax(nz_max, nz); 3041 nnz[i] = nz; 3042 } 3043 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3044 ierr = PetscFree(nnz);CHKERRQ(ierr); 3045 3046 values = (PetscScalar*)V; 3047 if (!values) { 3048 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3049 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3050 } 3051 for (i=0; i<m; i++) { 3052 PetscInt ncols = ii[i+1] - ii[i]; 3053 const PetscInt *icols = jj + ii[i]; 3054 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3055 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 } 3057 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3058 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3069 EXTERN_C_END 3070 3071 /*MC 3072 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3073 block sparse compressed row format. 3074 3075 Options Database Keys: 3076 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3077 3078 Level: beginner 3079 3080 .seealso: MatCreateSeqBAIJ() 3081 M*/ 3082 3083 3084 EXTERN_C_BEGIN 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatCreate_SeqBAIJ" 3087 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3088 { 3089 PetscErrorCode ierr; 3090 PetscMPIInt size; 3091 Mat_SeqBAIJ *b; 3092 3093 PetscFunctionBegin; 3094 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3095 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3096 3097 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3098 B->data = (void*)b; 3099 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3100 B->mapping = 0; 3101 b->row = 0; 3102 b->col = 0; 3103 b->icol = 0; 3104 b->reallocs = 0; 3105 b->saved_values = 0; 3106 3107 b->roworiented = PETSC_TRUE; 3108 b->nonew = 0; 3109 b->diag = 0; 3110 b->solve_work = 0; 3111 b->mult_work = 0; 3112 B->spptr = 0; 3113 B->info.nz_unneeded = (PetscReal)b->maxnz; 3114 b->keepnonzeropattern = PETSC_FALSE; 3115 b->xtoy = 0; 3116 b->XtoY = 0; 3117 b->compressedrow.use = PETSC_FALSE; 3118 b->compressedrow.nrows = 0; 3119 b->compressedrow.i = PETSC_NULL; 3120 b->compressedrow.rindex = PETSC_NULL; 3121 b->compressedrow.checked = PETSC_FALSE; 3122 B->same_nonzero = PETSC_FALSE; 3123 3124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3125 "MatGetFactorAvailable_seqbaij_petsc", 3126 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3128 "MatGetFactor_seqbaij_petsc", 3129 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3131 "MatInvertBlockDiagonal_SeqBAIJ", 3132 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3134 "MatStoreValues_SeqBAIJ", 3135 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3137 "MatRetrieveValues_SeqBAIJ", 3138 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3139 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3140 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3141 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3142 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3143 "MatConvert_SeqBAIJ_SeqAIJ", 3144 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3145 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3146 "MatConvert_SeqBAIJ_SeqSBAIJ", 3147 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3149 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3150 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3152 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3153 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3154 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3155 PetscFunctionReturn(0); 3156 } 3157 EXTERN_C_END 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3161 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3162 { 3163 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3164 PetscErrorCode ierr; 3165 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3166 3167 PetscFunctionBegin; 3168 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3169 3170 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3171 c->imax = a->imax; 3172 c->ilen = a->ilen; 3173 c->free_imax_ilen = PETSC_FALSE; 3174 } else { 3175 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3176 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3177 for (i=0; i<mbs; i++) { 3178 c->imax[i] = a->imax[i]; 3179 c->ilen[i] = a->ilen[i]; 3180 } 3181 c->free_imax_ilen = PETSC_TRUE; 3182 } 3183 3184 /* allocate the matrix space */ 3185 if (mallocmatspace){ 3186 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3187 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3188 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3189 c->singlemalloc = PETSC_FALSE; 3190 c->free_ij = PETSC_FALSE; 3191 c->i = a->i; 3192 c->j = a->j; 3193 c->parent = A; 3194 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3195 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3196 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3197 } else { 3198 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3199 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3200 c->singlemalloc = PETSC_TRUE; 3201 c->free_ij = PETSC_TRUE; 3202 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3203 if (mbs > 0) { 3204 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3205 if (cpvalues == MAT_COPY_VALUES) { 3206 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3207 } else { 3208 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3209 } 3210 } 3211 } 3212 } 3213 3214 c->roworiented = a->roworiented; 3215 c->nonew = a->nonew; 3216 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3217 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3218 c->bs2 = a->bs2; 3219 c->mbs = a->mbs; 3220 c->nbs = a->nbs; 3221 3222 if (a->diag) { 3223 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3224 c->diag = a->diag; 3225 c->free_diag = PETSC_FALSE; 3226 } else { 3227 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3228 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3229 for (i=0; i<mbs; i++) { 3230 c->diag[i] = a->diag[i]; 3231 } 3232 c->free_diag = PETSC_TRUE; 3233 } 3234 } else c->diag = 0; 3235 c->nz = a->nz; 3236 c->maxnz = a->maxnz; 3237 c->solve_work = 0; 3238 c->mult_work = 0; 3239 c->free_a = PETSC_TRUE; 3240 c->free_ij = PETSC_TRUE; 3241 C->preallocated = PETSC_TRUE; 3242 C->assembled = PETSC_TRUE; 3243 3244 c->compressedrow.use = a->compressedrow.use; 3245 c->compressedrow.nrows = a->compressedrow.nrows; 3246 c->compressedrow.checked = a->compressedrow.checked; 3247 if (a->compressedrow.checked && a->compressedrow.use){ 3248 i = a->compressedrow.nrows; 3249 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3252 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3253 } else { 3254 c->compressedrow.use = PETSC_FALSE; 3255 c->compressedrow.i = PETSC_NULL; 3256 c->compressedrow.rindex = PETSC_NULL; 3257 } 3258 C->same_nonzero = A->same_nonzero; 3259 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3260 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3266 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3267 { 3268 PetscErrorCode ierr; 3269 3270 PetscFunctionBegin; 3271 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3272 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3273 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3274 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatLoad_SeqBAIJ" 3280 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3281 { 3282 Mat_SeqBAIJ *a; 3283 Mat B; 3284 PetscErrorCode ierr; 3285 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3286 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3287 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3288 PetscInt *masked,nmask,tmp,bs2,ishift; 3289 PetscMPIInt size; 3290 int fd; 3291 PetscScalar *aa; 3292 MPI_Comm comm = ((PetscObject)viewer)->comm; 3293 3294 PetscFunctionBegin; 3295 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3296 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3297 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3298 bs2 = bs*bs; 3299 3300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3301 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3302 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3303 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3304 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3305 M = header[1]; N = header[2]; nz = header[3]; 3306 3307 if (header[3] < 0) { 3308 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3309 } 3310 3311 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3312 3313 /* 3314 This code adds extra rows to make sure the number of rows is 3315 divisible by the blocksize 3316 */ 3317 mbs = M/bs; 3318 extra_rows = bs - M + bs*(mbs); 3319 if (extra_rows == bs) extra_rows = 0; 3320 else mbs++; 3321 if (extra_rows) { 3322 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3323 } 3324 3325 /* read in row lengths */ 3326 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3327 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3328 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3329 3330 /* read in column indices */ 3331 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3332 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3333 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3334 3335 /* loop over row lengths determining block row lengths */ 3336 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3337 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3339 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3340 rowcount = 0; 3341 nzcount = 0; 3342 for (i=0; i<mbs; i++) { 3343 nmask = 0; 3344 for (j=0; j<bs; j++) { 3345 kmax = rowlengths[rowcount]; 3346 for (k=0; k<kmax; k++) { 3347 tmp = jj[nzcount++]/bs; 3348 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3349 } 3350 rowcount++; 3351 } 3352 browlengths[i] += nmask; 3353 /* zero out the mask elements we set */ 3354 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3355 } 3356 3357 /* create our matrix */ 3358 ierr = MatCreate(comm,&B); 3359 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3360 ierr = MatSetType(B,type);CHKERRQ(ierr); 3361 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3362 a = (Mat_SeqBAIJ*)B->data; 3363 3364 /* set matrix "i" values */ 3365 a->i[0] = 0; 3366 for (i=1; i<= mbs; i++) { 3367 a->i[i] = a->i[i-1] + browlengths[i-1]; 3368 a->ilen[i-1] = browlengths[i-1]; 3369 } 3370 a->nz = 0; 3371 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3372 3373 /* read in nonzero values */ 3374 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3375 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3376 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3377 3378 /* set "a" and "j" values into matrix */ 3379 nzcount = 0; jcount = 0; 3380 for (i=0; i<mbs; i++) { 3381 nzcountb = nzcount; 3382 nmask = 0; 3383 for (j=0; j<bs; j++) { 3384 kmax = rowlengths[i*bs+j]; 3385 for (k=0; k<kmax; k++) { 3386 tmp = jj[nzcount++]/bs; 3387 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3388 } 3389 } 3390 /* sort the masked values */ 3391 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3392 3393 /* set "j" values into matrix */ 3394 maskcount = 1; 3395 for (j=0; j<nmask; j++) { 3396 a->j[jcount++] = masked[j]; 3397 mask[masked[j]] = maskcount++; 3398 } 3399 /* set "a" values into matrix */ 3400 ishift = bs2*a->i[i]; 3401 for (j=0; j<bs; j++) { 3402 kmax = rowlengths[i*bs+j]; 3403 for (k=0; k<kmax; k++) { 3404 tmp = jj[nzcountb]/bs ; 3405 block = mask[tmp] - 1; 3406 point = jj[nzcountb] - bs*tmp; 3407 idx = ishift + bs2*block + j + bs*point; 3408 a->a[idx] = (MatScalar)aa[nzcountb++]; 3409 } 3410 } 3411 /* zero out the mask elements we set */ 3412 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3413 } 3414 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3415 3416 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3417 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3418 ierr = PetscFree(aa);CHKERRQ(ierr); 3419 ierr = PetscFree(jj);CHKERRQ(ierr); 3420 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3421 3422 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3423 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 ierr = MatView_Private(B);CHKERRQ(ierr); 3425 3426 *A = B; 3427 PetscFunctionReturn(0); 3428 } 3429 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "MatCreateSeqBAIJ" 3432 /*@C 3433 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3434 compressed row) format. For good matrix assembly performance the 3435 user should preallocate the matrix storage by setting the parameter nz 3436 (or the array nnz). By setting these parameters accurately, performance 3437 during matrix assembly can be increased by more than a factor of 50. 3438 3439 Collective on MPI_Comm 3440 3441 Input Parameters: 3442 + comm - MPI communicator, set to PETSC_COMM_SELF 3443 . bs - size of block 3444 . m - number of rows 3445 . n - number of columns 3446 . nz - number of nonzero blocks per block row (same for all rows) 3447 - nnz - array containing the number of nonzero blocks in the various block rows 3448 (possibly different for each block row) or PETSC_NULL 3449 3450 Output Parameter: 3451 . A - the matrix 3452 3453 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3454 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3455 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3456 3457 Options Database Keys: 3458 . -mat_no_unroll - uses code that does not unroll the loops in the 3459 block calculations (much slower) 3460 . -mat_block_size - size of the blocks to use 3461 3462 Level: intermediate 3463 3464 Notes: 3465 The number of rows and columns must be divisible by blocksize. 3466 3467 If the nnz parameter is given then the nz parameter is ignored 3468 3469 A nonzero block is any block that as 1 or more nonzeros in it 3470 3471 The block AIJ format is fully compatible with standard Fortran 77 3472 storage. That is, the stored row and column indices can begin at 3473 either one (as in Fortran) or zero. See the users' manual for details. 3474 3475 Specify the preallocated storage with either nz or nnz (not both). 3476 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3477 allocation. For additional details, see the users manual chapter on 3478 matrices. 3479 3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3481 @*/ 3482 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3483 { 3484 PetscErrorCode ierr; 3485 3486 PetscFunctionBegin; 3487 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3488 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3489 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3490 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3496 /*@C 3497 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3498 per row in the matrix. For good matrix assembly performance the 3499 user should preallocate the matrix storage by setting the parameter nz 3500 (or the array nnz). By setting these parameters accurately, performance 3501 during matrix assembly can be increased by more than a factor of 50. 3502 3503 Collective on MPI_Comm 3504 3505 Input Parameters: 3506 + A - the matrix 3507 . bs - size of block 3508 . nz - number of block nonzeros per block row (same for all rows) 3509 - nnz - array containing the number of block nonzeros in the various block rows 3510 (possibly different for each block row) or PETSC_NULL 3511 3512 Options Database Keys: 3513 . -mat_no_unroll - uses code that does not unroll the loops in the 3514 block calculations (much slower) 3515 . -mat_block_size - size of the blocks to use 3516 3517 Level: intermediate 3518 3519 Notes: 3520 If the nnz parameter is given then the nz parameter is ignored 3521 3522 You can call MatGetInfo() to get information on how effective the preallocation was; 3523 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3524 You can also run with the option -info and look for messages with the string 3525 malloc in them to see if additional memory allocation was needed. 3526 3527 The block AIJ format is fully compatible with standard Fortran 77 3528 storage. That is, the stored row and column indices can begin at 3529 either one (as in Fortran) or zero. See the users' manual for details. 3530 3531 Specify the preallocated storage with either nz or nnz (not both). 3532 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3533 allocation. For additional details, see the users manual chapter on 3534 matrices. 3535 3536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3537 @*/ 3538 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3539 { 3540 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3541 3542 PetscFunctionBegin; 3543 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3544 if (f) { 3545 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3546 } 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3552 /*@C 3553 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3554 (the default sequential PETSc format). 3555 3556 Collective on MPI_Comm 3557 3558 Input Parameters: 3559 + A - the matrix 3560 . i - the indices into j for the start of each local row (starts with zero) 3561 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3562 - v - optional values in the matrix 3563 3564 Level: developer 3565 3566 .keywords: matrix, aij, compressed row, sparse 3567 3568 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3569 @*/ 3570 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3571 { 3572 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3573 3574 PetscFunctionBegin; 3575 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3576 if (f) { 3577 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581 3582 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3585 /*@ 3586 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3587 (upper triangular entries in CSR format) provided by the user. 3588 3589 Collective on MPI_Comm 3590 3591 Input Parameters: 3592 + comm - must be an MPI communicator of size 1 3593 . bs - size of block 3594 . m - number of rows 3595 . n - number of columns 3596 . i - row indices 3597 . j - column indices 3598 - a - matrix values 3599 3600 Output Parameter: 3601 . mat - the matrix 3602 3603 Level: intermediate 3604 3605 Notes: 3606 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3607 once the matrix is destroyed 3608 3609 You cannot set new nonzero locations into this matrix, that will generate an error. 3610 3611 The i and j indices are 0 based 3612 3613 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3614 3615 @*/ 3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3617 { 3618 PetscErrorCode ierr; 3619 PetscInt ii; 3620 Mat_SeqBAIJ *baij; 3621 3622 PetscFunctionBegin; 3623 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3624 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3625 3626 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3627 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3628 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3629 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3630 baij = (Mat_SeqBAIJ*)(*mat)->data; 3631 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3632 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3633 3634 baij->i = i; 3635 baij->j = j; 3636 baij->a = a; 3637 baij->singlemalloc = PETSC_FALSE; 3638 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3639 baij->free_a = PETSC_FALSE; 3640 baij->free_ij = PETSC_FALSE; 3641 3642 for (ii=0; ii<m; ii++) { 3643 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3644 #if defined(PETSC_USE_DEBUG) 3645 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3646 #endif 3647 } 3648 #if defined(PETSC_USE_DEBUG) 3649 for (ii=0; ii<baij->i[m]; ii++) { 3650 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3651 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3652 } 3653 #endif 3654 3655 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 PetscFunctionReturn(0); 3658 } 3659