1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 1: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 1*diag_offset[i]; 68 diag[0] = odiag[0]; 69 mdiag[0] = odiag[0]; 70 diag[0] = 1.0 / (diag[0] + shift); 71 diag += 1; 72 mdiag += 1; 73 } 74 break; 75 case 2: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 4*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 81 diag += 4; 82 mdiag += 4; 83 } 84 break; 85 case 3: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 9*diag_offset[i]; 88 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 89 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 90 diag[8] = odiag[8]; 91 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 92 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 93 mdiag[8] = odiag[8]; 94 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 95 diag += 9; 96 mdiag += 9; 97 } 98 break; 99 case 4: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 16*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 105 diag += 16; 106 mdiag += 16; 107 } 108 break; 109 case 5: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 25*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 115 diag += 25; 116 mdiag += 25; 117 } 118 break; 119 case 6: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 36*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 125 diag += 36; 126 mdiag += 36; 127 } 128 break; 129 default: 130 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 139 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 PetscScalar *x,x1,s1; 143 const PetscScalar *b; 144 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 145 PetscErrorCode ierr; 146 PetscInt m = a->mbs,i,i2,nz,idx; 147 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 148 149 PetscFunctionBegin; 150 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 151 its = its*lits; 152 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 153 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 154 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 155 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 156 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 157 158 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 159 160 diag = a->diag; 161 idiag = a->idiag; 162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 163 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 164 165 if (flag & SOR_ZERO_INITIAL_GUESS) { 166 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 167 x[0] = b[0]*idiag[0]; 168 i2 = 1; 169 idiag += 1; 170 for (i=1; i<m; i++) { 171 v = aa + ai[i]; 172 vi = aj + ai[i]; 173 nz = diag[i] - ai[i]; 174 s1 = b[i2]; 175 while (nz--) { 176 idx = (*vi++); 177 x1 = x[idx]; 178 s1 -= v[0]*x1; 179 v += 1; 180 } 181 x[i2] = idiag[0]*s1; 182 idiag += 1; 183 i2 += 1; 184 } 185 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 186 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 187 } 188 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 189 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 190 i2 = 0; 191 mdiag = a->idiag+a->mbs; 192 for (i=0; i<m; i++) { 193 x1 = x[i2]; 194 x[i2] = mdiag[0]*x1; 195 mdiag += 1; 196 i2 += 1; 197 } 198 ierr = PetscLogFlops(m);CHKERRQ(ierr); 199 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 200 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 201 } 202 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 203 idiag = a->idiag+a->mbs - 1; 204 i2 = m - 1; 205 x1 = x[i2]; 206 x[i2] = idiag[0]*x1; 207 idiag -= 1; 208 i2 -= 1; 209 for (i=m-2; i>=0; i--) { 210 v = aa + (diag[i]+1); 211 vi = aj + diag[i] + 1; 212 nz = ai[i+1] - diag[i] - 1; 213 s1 = x[i2]; 214 while (nz--) { 215 idx = (*vi++); 216 x1 = x[idx]; 217 s1 -= v[0]*x1; 218 v += 1; 219 } 220 x[i2] = idiag[0]*s1; 221 idiag -= 1; 222 i2 -= 1; 223 } 224 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 225 } 226 } else { 227 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 228 } 229 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 230 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 236 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 237 { 238 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 239 PetscScalar *x,x1,x2,s1,s2; 240 const PetscScalar *b; 241 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 242 PetscErrorCode ierr; 243 PetscInt m = a->mbs,i,i2,nz,idx; 244 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 245 246 PetscFunctionBegin; 247 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 248 its = its*lits; 249 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 250 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 251 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 252 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 253 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 254 255 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 256 257 diag = a->diag; 258 idiag = a->idiag; 259 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 260 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 261 262 if (flag & SOR_ZERO_INITIAL_GUESS) { 263 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 264 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 265 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 266 i2 = 2; 267 idiag += 4; 268 for (i=1; i<m; i++) { 269 v = aa + 4*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s1 = b[i2]; s2 = b[i2+1]; 273 while (nz--) { 274 idx = 2*(*vi++); 275 x1 = x[idx]; x2 = x[1+idx]; 276 s1 -= v[0]*x1 + v[2]*x2; 277 s2 -= v[1]*x1 + v[3]*x2; 278 v += 4; 279 } 280 x[i2] = idiag[0]*s1 + idiag[2]*s2; 281 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 282 idiag += 4; 283 i2 += 2; 284 } 285 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 286 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 287 } 288 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 289 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 290 i2 = 0; 291 mdiag = a->idiag+4*a->mbs; 292 for (i=0; i<m; i++) { 293 x1 = x[i2]; x2 = x[i2+1]; 294 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 295 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 296 mdiag += 4; 297 i2 += 2; 298 } 299 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 300 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 301 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 302 } 303 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 304 idiag = a->idiag+4*a->mbs - 4; 305 i2 = 2*m - 2; 306 x1 = x[i2]; x2 = x[i2+1]; 307 x[i2] = idiag[0]*x1 + idiag[2]*x2; 308 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 309 idiag -= 4; 310 i2 -= 2; 311 for (i=m-2; i>=0; i--) { 312 v = aa + 4*(diag[i]+1); 313 vi = aj + diag[i] + 1; 314 nz = ai[i+1] - diag[i] - 1; 315 s1 = x[i2]; s2 = x[i2+1]; 316 while (nz--) { 317 idx = 2*(*vi++); 318 x1 = x[idx]; x2 = x[1+idx]; 319 s1 -= v[0]*x1 + v[2]*x2; 320 s2 -= v[1]*x1 + v[3]*x2; 321 v += 4; 322 } 323 x[i2] = idiag[0]*s1 + idiag[2]*s2; 324 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 325 idiag -= 4; 326 i2 -= 2; 327 } 328 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 329 } 330 } else { 331 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 332 } 333 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 334 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNCT__ 339 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 340 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 341 { 342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 343 PetscScalar *x,x1,x2,x3,s1,s2,s3; 344 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 345 const PetscScalar *b; 346 PetscErrorCode ierr; 347 PetscInt m = a->mbs,i,i2,nz,idx; 348 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 349 350 PetscFunctionBegin; 351 its = its*lits; 352 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 353 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 354 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 355 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 356 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 357 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 358 359 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 360 361 diag = a->diag; 362 idiag = a->idiag; 363 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 364 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 365 366 if (flag & SOR_ZERO_INITIAL_GUESS) { 367 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 368 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 369 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 370 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 371 i2 = 3; 372 idiag += 9; 373 for (i=1; i<m; i++) { 374 v = aa + 9*ai[i]; 375 vi = aj + ai[i]; 376 nz = diag[i] - ai[i]; 377 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 378 while (nz--) { 379 idx = 3*(*vi++); 380 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 381 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 382 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 383 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 384 v += 9; 385 } 386 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 387 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 388 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 389 idiag += 9; 390 i2 += 3; 391 } 392 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 393 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 394 } 395 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 396 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 397 i2 = 0; 398 mdiag = a->idiag+9*a->mbs; 399 for (i=0; i<m; i++) { 400 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 401 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 402 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 403 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 404 mdiag += 9; 405 i2 += 3; 406 } 407 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 408 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 409 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 410 } 411 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 412 idiag = a->idiag+9*a->mbs - 9; 413 i2 = 3*m - 3; 414 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 415 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 416 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 417 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 418 idiag -= 9; 419 i2 -= 3; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 428 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 429 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 430 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 431 v += 9; 432 } 433 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 434 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 435 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 436 idiag -= 9; 437 i2 -= 3; 438 } 439 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 440 } 441 } else { 442 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 443 } 444 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 445 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 451 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 452 { 453 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 454 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 455 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 456 const PetscScalar *b; 457 PetscErrorCode ierr; 458 PetscInt m = a->mbs,i,i2,nz,idx; 459 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 460 461 PetscFunctionBegin; 462 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 463 its = its*lits; 464 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 465 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 466 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 467 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 468 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 469 470 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 471 472 diag = a->diag; 473 idiag = a->idiag; 474 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 475 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 476 477 if (flag & SOR_ZERO_INITIAL_GUESS) { 478 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 479 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 480 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 481 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 482 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 483 i2 = 4; 484 idiag += 16; 485 for (i=1; i<m; i++) { 486 v = aa + 16*ai[i]; 487 vi = aj + ai[i]; 488 nz = diag[i] - ai[i]; 489 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 490 while (nz--) { 491 idx = 4*(*vi++); 492 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 493 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 494 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 495 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 496 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 497 v += 16; 498 } 499 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 500 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 501 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 502 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 503 idiag += 16; 504 i2 += 4; 505 } 506 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 507 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 508 } 509 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 510 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 511 i2 = 0; 512 mdiag = a->idiag+16*a->mbs; 513 for (i=0; i<m; i++) { 514 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 515 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 516 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 517 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 518 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 519 mdiag += 16; 520 i2 += 4; 521 } 522 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 523 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 524 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 525 } 526 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 527 idiag = a->idiag+16*a->mbs - 16; 528 i2 = 4*m - 4; 529 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 530 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 531 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 532 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 533 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 534 idiag -= 16; 535 i2 -= 4; 536 for (i=m-2; i>=0; i--) { 537 v = aa + 16*(diag[i]+1); 538 vi = aj + diag[i] + 1; 539 nz = ai[i+1] - diag[i] - 1; 540 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 541 while (nz--) { 542 idx = 4*(*vi++); 543 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 544 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 545 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 546 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 547 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 548 v += 16; 549 } 550 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 551 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 552 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 553 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 554 idiag -= 16; 555 i2 -= 4; 556 } 557 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 569 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 570 { 571 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 572 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 573 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 574 const PetscScalar *b; 575 PetscErrorCode ierr; 576 PetscInt m = a->mbs,i,i2,nz,idx; 577 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 578 579 PetscFunctionBegin; 580 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 581 its = its*lits; 582 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 583 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 584 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 585 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 586 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 587 588 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 589 590 diag = a->diag; 591 idiag = a->idiag; 592 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 593 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 594 595 if (flag & SOR_ZERO_INITIAL_GUESS) { 596 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 597 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 598 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 599 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 600 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 601 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 602 i2 = 5; 603 idiag += 25; 604 for (i=1; i<m; i++) { 605 v = aa + 25*ai[i]; 606 vi = aj + ai[i]; 607 nz = diag[i] - ai[i]; 608 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 609 while (nz--) { 610 idx = 5*(*vi++); 611 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 612 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 613 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 614 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 615 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 616 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 617 v += 25; 618 } 619 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 620 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 621 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 622 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 623 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 624 idiag += 25; 625 i2 += 5; 626 } 627 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 628 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 629 } 630 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 631 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 632 i2 = 0; 633 mdiag = a->idiag+25*a->mbs; 634 for (i=0; i<m; i++) { 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 637 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 638 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 639 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 640 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 641 mdiag += 25; 642 i2 += 5; 643 } 644 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 645 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 646 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 647 } 648 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 649 idiag = a->idiag+25*a->mbs - 25; 650 i2 = 5*m - 5; 651 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 652 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 653 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 654 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 655 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 656 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 657 idiag -= 25; 658 i2 -= 5; 659 for (i=m-2; i>=0; i--) { 660 v = aa + 25*(diag[i]+1); 661 vi = aj + diag[i] + 1; 662 nz = ai[i+1] - diag[i] - 1; 663 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 664 while (nz--) { 665 idx = 5*(*vi++); 666 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 667 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 668 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 669 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 670 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 671 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 672 v += 25; 673 } 674 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 675 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 676 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 677 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 678 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 679 idiag -= 25; 680 i2 -= 5; 681 } 682 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 683 } 684 } else { 685 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 686 } 687 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 688 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 694 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 695 { 696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 697 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 698 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 699 const PetscScalar *b; 700 PetscErrorCode ierr; 701 PetscInt m = a->mbs,i,i2,nz,idx; 702 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 703 704 PetscFunctionBegin; 705 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 706 its = its*lits; 707 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 708 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 709 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 710 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 711 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 712 713 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 714 715 diag = a->diag; 716 idiag = a->idiag; 717 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 718 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 719 720 if (flag & SOR_ZERO_INITIAL_GUESS) { 721 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 722 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 723 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 724 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 725 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 726 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 727 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 728 i2 = 6; 729 idiag += 36; 730 for (i=1; i<m; i++) { 731 v = aa + 36*ai[i]; 732 vi = aj + ai[i]; 733 nz = diag[i] - ai[i]; 734 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 735 while (nz--) { 736 idx = 6*(*vi++); 737 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 738 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 739 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 740 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 741 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 742 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 743 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 744 v += 36; 745 } 746 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 747 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 748 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 749 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 750 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 751 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 752 idiag += 36; 753 i2 += 6; 754 } 755 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 756 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 757 } 758 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 759 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 760 i2 = 0; 761 mdiag = a->idiag+36*a->mbs; 762 for (i=0; i<m; i++) { 763 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 764 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 765 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 766 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 767 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 768 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 769 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 770 mdiag += 36; 771 i2 += 6; 772 } 773 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 774 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 775 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 776 } 777 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 778 idiag = a->idiag+36*a->mbs - 36; 779 i2 = 6*m - 6; 780 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 781 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 782 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 783 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 784 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 785 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 786 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 787 idiag -= 36; 788 i2 -= 6; 789 for (i=m-2; i>=0; i--) { 790 v = aa + 36*(diag[i]+1); 791 vi = aj + diag[i] + 1; 792 nz = ai[i+1] - diag[i] - 1; 793 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 794 while (nz--) { 795 idx = 6*(*vi++); 796 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 797 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 798 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 799 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 800 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 801 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 802 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 803 v += 36; 804 } 805 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 806 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 807 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 808 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 809 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 810 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 811 idiag -= 36; 812 i2 -= 6; 813 } 814 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 815 } 816 } else { 817 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 818 } 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 826 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 830 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 831 const PetscScalar *b; 832 PetscErrorCode ierr; 833 PetscInt m = a->mbs,i,i2,nz,idx; 834 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 835 836 PetscFunctionBegin; 837 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 838 its = its*lits; 839 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 840 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 841 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 842 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 843 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 844 845 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 846 847 diag = a->diag; 848 idiag = a->idiag; 849 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 850 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 851 852 if (flag & SOR_ZERO_INITIAL_GUESS) { 853 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 854 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 855 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 856 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 857 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 858 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 859 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 860 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 861 i2 = 7; 862 idiag += 49; 863 for (i=1; i<m; i++) { 864 v = aa + 49*ai[i]; 865 vi = aj + ai[i]; 866 nz = diag[i] - ai[i]; 867 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 868 while (nz--) { 869 idx = 7*(*vi++); 870 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 871 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 872 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 873 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 874 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 875 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 876 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 877 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 878 v += 49; 879 } 880 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 881 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 882 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 883 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 884 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 885 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 886 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 887 idiag += 49; 888 i2 += 7; 889 } 890 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 891 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 892 } 893 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 894 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 895 i2 = 0; 896 mdiag = a->idiag+49*a->mbs; 897 for (i=0; i<m; i++) { 898 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 899 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 900 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 901 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 902 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 903 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 904 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 905 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 906 mdiag += 36; 907 i2 += 6; 908 } 909 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 910 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 911 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 912 } 913 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 914 idiag = a->idiag+49*a->mbs - 49; 915 i2 = 7*m - 7; 916 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 917 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 918 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 919 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 920 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 921 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 922 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 923 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 924 idiag -= 49; 925 i2 -= 7; 926 for (i=m-2; i>=0; i--) { 927 v = aa + 49*(diag[i]+1); 928 vi = aj + diag[i] + 1; 929 nz = ai[i+1] - diag[i] - 1; 930 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 931 while (nz--) { 932 idx = 7*(*vi++); 933 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 934 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 935 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 936 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 937 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 938 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 939 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 940 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 941 v += 49; 942 } 943 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 944 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 945 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 946 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 947 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 948 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 949 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 950 idiag -= 49; 951 i2 -= 7; 952 } 953 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 954 } 955 } else { 956 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 957 } 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 /* 964 Special version for direct calls from Fortran (Used in PETSc-fun3d) 965 */ 966 #if defined(PETSC_HAVE_FORTRAN_CAPS) 967 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 969 #define matsetvaluesblocked4_ matsetvaluesblocked4 970 #endif 971 972 EXTERN_C_BEGIN 973 #undef __FUNCT__ 974 #define __FUNCT__ "matsetvaluesblocked4_" 975 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 976 { 977 Mat A = *AA; 978 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 979 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 980 PetscInt *ai=a->i,*ailen=a->ilen; 981 PetscInt *aj=a->j,stepval,lastcol = -1; 982 const PetscScalar *value = v; 983 MatScalar *ap,*aa = a->a,*bap; 984 985 PetscFunctionBegin; 986 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 987 stepval = (n-1)*4; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; 990 rp = aj + ai[row]; 991 ap = aa + 16*ai[row]; 992 nrow = ailen[row]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; 997 if (col <= lastcol) low = 0; else high = nrow; 998 lastcol = col; 999 value = v + k*(stepval+4 + l)*4; 1000 while (high-low > 7) { 1001 t = (low+high)/2; 1002 if (rp[t] > col) high = t; 1003 else low = t; 1004 } 1005 for (i=low; i<high; i++) { 1006 if (rp[i] > col) break; 1007 if (rp[i] == col) { 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] += *value++; 1012 } 1013 } 1014 goto noinsert2; 1015 } 1016 } 1017 N = nrow++ - 1; 1018 high++; /* added new column index thus must search to one higher than before */ 1019 /* shift up all the later entries in this row */ 1020 for (ii=N; ii>=i; ii--) { 1021 rp[ii+1] = rp[ii]; 1022 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1023 } 1024 if (N >= i) { 1025 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1026 } 1027 rp[i] = col; 1028 bap = ap + 16*i; 1029 for (ii=0; ii<4; ii++,value+=stepval) { 1030 for (jj=ii; jj<16; jj+=4) { 1031 bap[jj] = *value++; 1032 } 1033 } 1034 noinsert2:; 1035 low = i; 1036 } 1037 ailen[row] = nrow; 1038 } 1039 PetscFunctionReturnVoid(); 1040 } 1041 EXTERN_C_END 1042 1043 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1044 #define matsetvalues4_ MATSETVALUES4 1045 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1046 #define matsetvalues4_ matsetvalues4 1047 #endif 1048 1049 EXTERN_C_BEGIN 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatSetValues4_" 1052 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1053 { 1054 Mat A = *AA; 1055 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1056 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1057 PetscInt *ai=a->i,*ailen=a->ilen; 1058 PetscInt *aj=a->j,brow,bcol; 1059 PetscInt ridx,cidx,lastcol = -1; 1060 MatScalar *ap,value,*aa=a->a,*bap; 1061 1062 PetscFunctionBegin; 1063 for (k=0; k<m; k++) { /* loop over added rows */ 1064 row = im[k]; brow = row/4; 1065 rp = aj + ai[brow]; 1066 ap = aa + 16*ai[brow]; 1067 nrow = ailen[brow]; 1068 low = 0; 1069 high = nrow; 1070 for (l=0; l<n; l++) { /* loop over added columns */ 1071 col = in[l]; bcol = col/4; 1072 ridx = row % 4; cidx = col % 4; 1073 value = v[l + k*n]; 1074 if (col <= lastcol) low = 0; else high = nrow; 1075 lastcol = col; 1076 while (high-low > 7) { 1077 t = (low+high)/2; 1078 if (rp[t] > bcol) high = t; 1079 else low = t; 1080 } 1081 for (i=low; i<high; i++) { 1082 if (rp[i] > bcol) break; 1083 if (rp[i] == bcol) { 1084 bap = ap + 16*i + 4*cidx + ridx; 1085 *bap += value; 1086 goto noinsert1; 1087 } 1088 } 1089 N = nrow++ - 1; 1090 high++; /* added new column thus must search to one higher than before */ 1091 /* shift up all the later entries in this row */ 1092 for (ii=N; ii>=i; ii--) { 1093 rp[ii+1] = rp[ii]; 1094 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1095 } 1096 if (N>=i) { 1097 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1098 } 1099 rp[i] = bcol; 1100 ap[16*i + 4*cidx + ridx] = value; 1101 noinsert1:; 1102 low = i; 1103 } 1104 ailen[brow] = nrow; 1105 } 1106 PetscFunctionReturnVoid(); 1107 } 1108 EXTERN_C_END 1109 1110 /* 1111 Checks for missing diagonals 1112 */ 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1115 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1116 { 1117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 PetscInt *diag,*jj = a->j,i; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1123 *missing = PETSC_FALSE; 1124 if (A->rmap->n > 0 && !jj) { 1125 *missing = PETSC_TRUE; 1126 if (d) *d = 0; 1127 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1128 } else { 1129 diag = a->diag; 1130 for (i=0; i<a->mbs; i++) { 1131 if (jj[diag[i]] != i) { 1132 *missing = PETSC_TRUE; 1133 if (d) *d = i; 1134 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1135 } 1136 } 1137 } 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1143 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,m = a->mbs; 1148 1149 PetscFunctionBegin; 1150 if (!a->diag) { 1151 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1152 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1153 a->free_diag = PETSC_TRUE; 1154 } 1155 for (i=0; i<m; i++) { 1156 a->diag[i] = a->i[i+1]; 1157 for (j=a->i[i]; j<a->i[i+1]; j++) { 1158 if (a->j[j] == i) { 1159 a->diag[i] = j; 1160 break; 1161 } 1162 } 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 1168 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1172 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1173 { 1174 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1177 PetscInt *tia, *tja; 1178 1179 PetscFunctionBegin; 1180 *nn = n; 1181 if (!ia) PetscFunctionReturn(0); 1182 if (symmetric) { 1183 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1184 } else { 1185 tia = a->i; tja = a->j; 1186 } 1187 1188 if (!blockcompressed && bs > 1) { 1189 (*nn) *= bs; 1190 nbs = bs; 1191 /* malloc & create the natural set of indices */ 1192 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1193 if (n) { 1194 (*ia)[0] = 0; 1195 for (j=1; j<bs; j++) { 1196 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1197 } 1198 } 1199 1200 for (i=1; i<n; i++) { 1201 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1202 for (j=1; j<bs; j++) { 1203 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1204 } 1205 } 1206 if (n) { 1207 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1208 } 1209 1210 if (ja) { 1211 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1212 cnt = 0; 1213 for (i=0; i<n; i++) { 1214 for (j=0; j<bs; j++) { 1215 for (k=tia[i]; k<tia[i+1]; k++) { 1216 for (l=0; l<bs; l++) { 1217 (*ja)[cnt++] = bs*tja[k] + l; 1218 } 1219 } 1220 } 1221 } 1222 } 1223 1224 n *= bs; 1225 nz *= bs*bs; 1226 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1227 ierr = PetscFree(tia);CHKERRQ(ierr); 1228 ierr = PetscFree(tja);CHKERRQ(ierr); 1229 } 1230 } else if (oshift == 1) { 1231 if (symmetric) { 1232 PetscInt nz = tia[A->rmap->n/bs]; 1233 /* add 1 to i and j indices */ 1234 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1235 *ia = tia; 1236 if (ja) { 1237 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1238 *ja = tja; 1239 } 1240 } else { 1241 PetscInt nz = a->i[A->rmap->n/bs]; 1242 /* malloc space and add 1 to i and j indices */ 1243 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1244 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1245 if (ja) { 1246 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1247 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1248 } 1249 } 1250 } else { 1251 *ia = tia; 1252 if (ja) *ja = tja; 1253 } 1254 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1260 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1261 { 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 if (!ia) PetscFunctionReturn(0); 1266 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1267 ierr = PetscFree(*ia);CHKERRQ(ierr); 1268 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1275 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1276 { 1277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 #if defined(PETSC_USE_LOG) 1282 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1283 #endif 1284 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1285 if (a->row) { 1286 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1287 } 1288 if (a->col) { 1289 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1290 } 1291 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1292 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1293 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1294 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1295 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1296 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1297 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1298 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1299 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1300 1301 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1302 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1303 ierr = PetscFree(a);CHKERRQ(ierr); 1304 1305 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1319 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1320 { 1321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 switch (op) { 1326 case MAT_ROW_ORIENTED: 1327 a->roworiented = flg; 1328 break; 1329 case MAT_KEEP_NONZERO_PATTERN: 1330 a->keepnonzeropattern = flg; 1331 break; 1332 case MAT_NEW_NONZERO_LOCATIONS: 1333 a->nonew = (flg ? 0 : 1); 1334 break; 1335 case MAT_NEW_NONZERO_LOCATION_ERR: 1336 a->nonew = (flg ? -1 : 0); 1337 break; 1338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1339 a->nonew = (flg ? -2 : 0); 1340 break; 1341 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1342 a->nounused = (flg ? -1 : 0); 1343 break; 1344 case MAT_NEW_DIAGONALS: 1345 case MAT_IGNORE_OFF_PROC_ENTRIES: 1346 case MAT_USE_HASH_TABLE: 1347 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1348 break; 1349 case MAT_SYMMETRIC: 1350 case MAT_STRUCTURALLY_SYMMETRIC: 1351 case MAT_HERMITIAN: 1352 case MAT_SYMMETRY_ETERNAL: 1353 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1354 break; 1355 default: 1356 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1357 } 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1363 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1364 { 1365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1366 PetscErrorCode ierr; 1367 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1368 MatScalar *aa,*aa_i; 1369 PetscScalar *v_i; 1370 1371 PetscFunctionBegin; 1372 bs = A->rmap->bs; 1373 ai = a->i; 1374 aj = a->j; 1375 aa = a->a; 1376 bs2 = a->bs2; 1377 1378 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1379 1380 bn = row/bs; /* Block number */ 1381 bp = row % bs; /* Block Position */ 1382 M = ai[bn+1] - ai[bn]; 1383 *nz = bs*M; 1384 1385 if (v) { 1386 *v = 0; 1387 if (*nz) { 1388 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1389 for (i=0; i<M; i++) { /* for each block in the block row */ 1390 v_i = *v + i*bs; 1391 aa_i = aa + bs2*(ai[bn] + i); 1392 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1393 } 1394 } 1395 } 1396 1397 if (idx) { 1398 *idx = 0; 1399 if (*nz) { 1400 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1401 for (i=0; i<M; i++) { /* for each block in the block row */ 1402 idx_i = *idx + i*bs; 1403 itmp = bs*aj[ai[bn] + i]; 1404 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1405 } 1406 } 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1413 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1414 { 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1419 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1425 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1426 { 1427 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1428 Mat C; 1429 PetscErrorCode ierr; 1430 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1431 PetscInt *rows,*cols,bs2=a->bs2; 1432 MatScalar *array; 1433 1434 PetscFunctionBegin; 1435 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1436 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1437 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1438 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1439 1440 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1441 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1442 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1443 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1444 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1445 ierr = PetscFree(col);CHKERRQ(ierr); 1446 } else { 1447 C = *B; 1448 } 1449 1450 array = a->a; 1451 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1452 cols = rows + bs; 1453 for (i=0; i<mbs; i++) { 1454 cols[0] = i*bs; 1455 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1456 len = ai[i+1] - ai[i]; 1457 for (j=0; j<len; j++) { 1458 rows[0] = (*aj++)*bs; 1459 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1460 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1461 array += bs2; 1462 } 1463 } 1464 ierr = PetscFree(rows);CHKERRQ(ierr); 1465 1466 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1467 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1468 1469 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1470 *B = C; 1471 } else { 1472 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1479 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1480 { 1481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1482 PetscErrorCode ierr; 1483 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1484 int fd; 1485 PetscScalar *aa; 1486 FILE *file; 1487 1488 PetscFunctionBegin; 1489 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1490 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1491 col_lens[0] = MAT_FILE_COOKIE; 1492 1493 col_lens[1] = A->rmap->N; 1494 col_lens[2] = A->cmap->n; 1495 col_lens[3] = a->nz*bs2; 1496 1497 /* store lengths of each row and write (including header) to file */ 1498 count = 0; 1499 for (i=0; i<a->mbs; i++) { 1500 for (j=0; j<bs; j++) { 1501 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1502 } 1503 } 1504 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1505 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1506 1507 /* store column indices (zero start index) */ 1508 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1509 count = 0; 1510 for (i=0; i<a->mbs; i++) { 1511 for (j=0; j<bs; j++) { 1512 for (k=a->i[i]; k<a->i[i+1]; k++) { 1513 for (l=0; l<bs; l++) { 1514 jj[count++] = bs*a->j[k] + l; 1515 } 1516 } 1517 } 1518 } 1519 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1520 ierr = PetscFree(jj);CHKERRQ(ierr); 1521 1522 /* store nonzero values */ 1523 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1524 count = 0; 1525 for (i=0; i<a->mbs; i++) { 1526 for (j=0; j<bs; j++) { 1527 for (k=a->i[i]; k<a->i[i+1]; k++) { 1528 for (l=0; l<bs; l++) { 1529 aa[count++] = a->a[bs2*k + l*bs + j]; 1530 } 1531 } 1532 } 1533 } 1534 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1535 ierr = PetscFree(aa);CHKERRQ(ierr); 1536 1537 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1538 if (file) { 1539 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1546 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1547 { 1548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1549 PetscErrorCode ierr; 1550 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1551 PetscViewerFormat format; 1552 1553 PetscFunctionBegin; 1554 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1555 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1556 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1557 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1558 Mat aij; 1559 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1560 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(aij);CHKERRQ(ierr); 1562 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1563 PetscFunctionReturn(0); 1564 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1565 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1566 for (i=0; i<a->mbs; i++) { 1567 for (j=0; j<bs; j++) { 1568 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1569 for (k=a->i[i]; k<a->i[i+1]; k++) { 1570 for (l=0; l<bs; l++) { 1571 #if defined(PETSC_USE_COMPLEX) 1572 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1574 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1575 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1576 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1577 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1578 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1580 } 1581 #else 1582 if (a->a[bs2*k + l*bs + j] != 0.0) { 1583 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1584 } 1585 #endif 1586 } 1587 } 1588 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1592 } else { 1593 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1594 for (i=0; i<a->mbs; i++) { 1595 for (j=0; j<bs; j++) { 1596 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1597 for (k=a->i[i]; k<a->i[i+1]; k++) { 1598 for (l=0; l<bs; l++) { 1599 #if defined(PETSC_USE_COMPLEX) 1600 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1601 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1602 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1603 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1605 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1606 } else { 1607 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1608 } 1609 #else 1610 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1611 #endif 1612 } 1613 } 1614 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1615 } 1616 } 1617 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1618 } 1619 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1625 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1626 { 1627 Mat A = (Mat) Aa; 1628 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1629 PetscErrorCode ierr; 1630 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1631 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1632 MatScalar *aa; 1633 PetscViewer viewer; 1634 1635 PetscFunctionBegin; 1636 1637 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1638 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1639 1640 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1641 1642 /* loop over matrix elements drawing boxes */ 1643 color = PETSC_DRAW_BLUE; 1644 for (i=0,row=0; i<mbs; i++,row+=bs) { 1645 for (j=a->i[i]; j<a->i[i+1]; j++) { 1646 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1647 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1648 aa = a->a + j*bs2; 1649 for (k=0; k<bs; k++) { 1650 for (l=0; l<bs; l++) { 1651 if (PetscRealPart(*aa++) >= 0.) continue; 1652 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1653 } 1654 } 1655 } 1656 } 1657 color = PETSC_DRAW_CYAN; 1658 for (i=0,row=0; i<mbs; i++,row+=bs) { 1659 for (j=a->i[i]; j<a->i[i+1]; j++) { 1660 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1661 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1662 aa = a->a + j*bs2; 1663 for (k=0; k<bs; k++) { 1664 for (l=0; l<bs; l++) { 1665 if (PetscRealPart(*aa++) != 0.) continue; 1666 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 } 1671 1672 color = PETSC_DRAW_RED; 1673 for (i=0,row=0; i<mbs; i++,row+=bs) { 1674 for (j=a->i[i]; j<a->i[i+1]; j++) { 1675 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1676 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1677 aa = a->a + j*bs2; 1678 for (k=0; k<bs; k++) { 1679 for (l=0; l<bs; l++) { 1680 if (PetscRealPart(*aa++) <= 0.) continue; 1681 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1682 } 1683 } 1684 } 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1691 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1692 { 1693 PetscErrorCode ierr; 1694 PetscReal xl,yl,xr,yr,w,h; 1695 PetscDraw draw; 1696 PetscTruth isnull; 1697 1698 PetscFunctionBegin; 1699 1700 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1701 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1702 1703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1704 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1705 xr += w; yr += h; xl = -w; yl = -h; 1706 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1707 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1708 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatView_SeqBAIJ" 1714 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1715 { 1716 PetscErrorCode ierr; 1717 PetscTruth iascii,isbinary,isdraw; 1718 1719 PetscFunctionBegin; 1720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1723 if (iascii){ 1724 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1725 } else if (isbinary) { 1726 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1727 } else if (isdraw) { 1728 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1729 } else { 1730 Mat B; 1731 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1732 ierr = MatView(B,viewer);CHKERRQ(ierr); 1733 ierr = MatDestroy(B);CHKERRQ(ierr); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1741 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1742 { 1743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1744 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1745 PetscInt *ai = a->i,*ailen = a->ilen; 1746 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1747 MatScalar *ap,*aa = a->a; 1748 1749 PetscFunctionBegin; 1750 for (k=0; k<m; k++) { /* loop over rows */ 1751 row = im[k]; brow = row/bs; 1752 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1753 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1754 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1755 nrow = ailen[brow]; 1756 for (l=0; l<n; l++) { /* loop over columns */ 1757 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1758 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1759 col = in[l] ; 1760 bcol = col/bs; 1761 cidx = col%bs; 1762 ridx = row%bs; 1763 high = nrow; 1764 low = 0; /* assume unsorted */ 1765 while (high-low > 5) { 1766 t = (low+high)/2; 1767 if (rp[t] > bcol) high = t; 1768 else low = t; 1769 } 1770 for (i=low; i<high; i++) { 1771 if (rp[i] > bcol) break; 1772 if (rp[i] == bcol) { 1773 *v++ = ap[bs2*i+bs*cidx+ridx]; 1774 goto finished; 1775 } 1776 } 1777 *v++ = 0.0; 1778 finished:; 1779 } 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #define CHUNKSIZE 10 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1787 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1788 { 1789 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1790 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1791 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1792 PetscErrorCode ierr; 1793 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1794 PetscTruth roworiented=a->roworiented; 1795 const PetscScalar *value = v; 1796 MatScalar *ap,*aa = a->a,*bap; 1797 1798 PetscFunctionBegin; 1799 if (roworiented) { 1800 stepval = (n-1)*bs; 1801 } else { 1802 stepval = (m-1)*bs; 1803 } 1804 for (k=0; k<m; k++) { /* loop over added rows */ 1805 row = im[k]; 1806 if (row < 0) continue; 1807 #if defined(PETSC_USE_DEBUG) 1808 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1809 #endif 1810 rp = aj + ai[row]; 1811 ap = aa + bs2*ai[row]; 1812 rmax = imax[row]; 1813 nrow = ailen[row]; 1814 low = 0; 1815 high = nrow; 1816 for (l=0; l<n; l++) { /* loop over added columns */ 1817 if (in[l] < 0) continue; 1818 #if defined(PETSC_USE_DEBUG) 1819 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1820 #endif 1821 col = in[l]; 1822 if (roworiented) { 1823 value = v + k*(stepval+bs)*bs + l*bs; 1824 } else { 1825 value = v + l*(stepval+bs)*bs + k*bs; 1826 } 1827 if (col <= lastcol) low = 0; else high = nrow; 1828 lastcol = col; 1829 while (high-low > 7) { 1830 t = (low+high)/2; 1831 if (rp[t] > col) high = t; 1832 else low = t; 1833 } 1834 for (i=low; i<high; i++) { 1835 if (rp[i] > col) break; 1836 if (rp[i] == col) { 1837 bap = ap + bs2*i; 1838 if (roworiented) { 1839 if (is == ADD_VALUES) { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] += *value++; 1843 } 1844 } 1845 } else { 1846 for (ii=0; ii<bs; ii++,value+=stepval) { 1847 for (jj=ii; jj<bs2; jj+=bs) { 1848 bap[jj] = *value++; 1849 } 1850 } 1851 } 1852 } else { 1853 if (is == ADD_VALUES) { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 *bap++ += *value++; 1857 } 1858 } 1859 } else { 1860 for (ii=0; ii<bs; ii++,value+=stepval) { 1861 for (jj=0; jj<bs; jj++) { 1862 *bap++ = *value++; 1863 } 1864 } 1865 } 1866 } 1867 goto noinsert2; 1868 } 1869 } 1870 if (nonew == 1) goto noinsert2; 1871 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1872 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1873 N = nrow++ - 1; high++; 1874 /* shift up all the later entries in this row */ 1875 for (ii=N; ii>=i; ii--) { 1876 rp[ii+1] = rp[ii]; 1877 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1878 } 1879 if (N >= i) { 1880 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1881 } 1882 rp[i] = col; 1883 bap = ap + bs2*i; 1884 if (roworiented) { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=ii; jj<bs2; jj+=bs) { 1887 bap[jj] = *value++; 1888 } 1889 } 1890 } else { 1891 for (ii=0; ii<bs; ii++,value+=stepval) { 1892 for (jj=0; jj<bs; jj++) { 1893 *bap++ = *value++; 1894 } 1895 } 1896 } 1897 noinsert2:; 1898 low = i; 1899 } 1900 ailen[row] = nrow; 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1907 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1908 { 1909 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1910 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1911 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1912 PetscErrorCode ierr; 1913 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1914 MatScalar *aa = a->a,*ap; 1915 PetscReal ratio=0.6; 1916 1917 PetscFunctionBegin; 1918 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1919 1920 if (m) rmax = ailen[0]; 1921 for (i=1; i<mbs; i++) { 1922 /* move each row back by the amount of empty slots (fshift) before it*/ 1923 fshift += imax[i-1] - ailen[i-1]; 1924 rmax = PetscMax(rmax,ailen[i]); 1925 if (fshift) { 1926 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1927 N = ailen[i]; 1928 for (j=0; j<N; j++) { 1929 ip[j-fshift] = ip[j]; 1930 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1931 } 1932 } 1933 ai[i] = ai[i-1] + ailen[i-1]; 1934 } 1935 if (mbs) { 1936 fshift += imax[mbs-1] - ailen[mbs-1]; 1937 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1938 } 1939 /* reset ilen and imax for each row */ 1940 for (i=0; i<mbs; i++) { 1941 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1942 } 1943 a->nz = ai[mbs]; 1944 1945 /* diagonals may have moved, so kill the diagonal pointers */ 1946 a->idiagvalid = PETSC_FALSE; 1947 if (fshift && a->diag) { 1948 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1949 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1950 a->diag = 0; 1951 } 1952 if (fshift && a->nounused == -1) { 1953 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1954 } 1955 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1956 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1957 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1958 a->reallocs = 0; 1959 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1960 1961 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1962 if (a->compressedrow.use){ 1963 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1964 } 1965 1966 A->same_nonzero = PETSC_TRUE; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /* 1971 This function returns an array of flags which indicate the locations of contiguous 1972 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1973 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1974 Assume: sizes should be long enough to hold all the values. 1975 */ 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1978 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1979 { 1980 PetscInt i,j,k,row; 1981 PetscTruth flg; 1982 1983 PetscFunctionBegin; 1984 for (i=0,j=0; i<n; j++) { 1985 row = idx[i]; 1986 if (row%bs!=0) { /* Not the begining of a block */ 1987 sizes[j] = 1; 1988 i++; 1989 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1990 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1991 i++; 1992 } else { /* Begining of the block, so check if the complete block exists */ 1993 flg = PETSC_TRUE; 1994 for (k=1; k<bs; k++) { 1995 if (row+k != idx[i+k]) { /* break in the block */ 1996 flg = PETSC_FALSE; 1997 break; 1998 } 1999 } 2000 if (flg) { /* No break in the bs */ 2001 sizes[j] = bs; 2002 i+= bs; 2003 } else { 2004 sizes[j] = 1; 2005 i++; 2006 } 2007 } 2008 } 2009 *bs_max = j; 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2015 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2016 { 2017 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2018 PetscErrorCode ierr; 2019 PetscInt i,j,k,count,*rows; 2020 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2021 PetscScalar zero = 0.0; 2022 MatScalar *aa; 2023 2024 PetscFunctionBegin; 2025 /* Make a copy of the IS and sort it */ 2026 /* allocate memory for rows,sizes */ 2027 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2028 sizes = rows + is_n; 2029 2030 /* copy IS values to rows, and sort them */ 2031 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2032 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2033 if (baij->keepnonzeropattern) { 2034 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2035 bs_max = is_n; 2036 A->same_nonzero = PETSC_TRUE; 2037 } else { 2038 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2039 A->same_nonzero = PETSC_FALSE; 2040 } 2041 2042 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2043 row = rows[j]; 2044 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2045 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2046 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2047 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2048 if (diag != 0.0) { 2049 if (baij->ilen[row/bs] > 0) { 2050 baij->ilen[row/bs] = 1; 2051 baij->j[baij->i[row/bs]] = row/bs; 2052 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2053 } 2054 /* Now insert all the diagonal values for this bs */ 2055 for (k=0; k<bs; k++) { 2056 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2057 } 2058 } else { /* (diag == 0.0) */ 2059 baij->ilen[row/bs] = 0; 2060 } /* end (diag == 0.0) */ 2061 } else { /* (sizes[i] != bs) */ 2062 #if defined (PETSC_USE_DEBUG) 2063 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2064 #endif 2065 for (k=0; k<count; k++) { 2066 aa[0] = zero; 2067 aa += bs; 2068 } 2069 if (diag != 0.0) { 2070 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2071 } 2072 } 2073 } 2074 2075 ierr = PetscFree(rows);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2082 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2083 { 2084 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2085 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2086 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2087 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2088 PetscErrorCode ierr; 2089 PetscInt ridx,cidx,bs2=a->bs2; 2090 PetscTruth roworiented=a->roworiented; 2091 MatScalar *ap,value,*aa=a->a,*bap; 2092 2093 PetscFunctionBegin; 2094 for (k=0; k<m; k++) { /* loop over added rows */ 2095 row = im[k]; 2096 brow = row/bs; 2097 if (row < 0) continue; 2098 #if defined(PETSC_USE_DEBUG) 2099 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2100 #endif 2101 rp = aj + ai[brow]; 2102 ap = aa + bs2*ai[brow]; 2103 rmax = imax[brow]; 2104 nrow = ailen[brow]; 2105 low = 0; 2106 high = nrow; 2107 for (l=0; l<n; l++) { /* loop over added columns */ 2108 if (in[l] < 0) continue; 2109 #if defined(PETSC_USE_DEBUG) 2110 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2111 #endif 2112 col = in[l]; bcol = col/bs; 2113 ridx = row % bs; cidx = col % bs; 2114 if (roworiented) { 2115 value = v[l + k*n]; 2116 } else { 2117 value = v[k + l*m]; 2118 } 2119 if (col <= lastcol) low = 0; else high = nrow; 2120 lastcol = col; 2121 while (high-low > 7) { 2122 t = (low+high)/2; 2123 if (rp[t] > bcol) high = t; 2124 else low = t; 2125 } 2126 for (i=low; i<high; i++) { 2127 if (rp[i] > bcol) break; 2128 if (rp[i] == bcol) { 2129 bap = ap + bs2*i + bs*cidx + ridx; 2130 if (is == ADD_VALUES) *bap += value; 2131 else *bap = value; 2132 goto noinsert1; 2133 } 2134 } 2135 if (nonew == 1) goto noinsert1; 2136 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2138 N = nrow++ - 1; high++; 2139 /* shift up all the later entries in this row */ 2140 for (ii=N; ii>=i; ii--) { 2141 rp[ii+1] = rp[ii]; 2142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2143 } 2144 if (N>=i) { 2145 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2146 } 2147 rp[i] = bcol; 2148 ap[bs2*i + bs*cidx + ridx] = value; 2149 a->nz++; 2150 noinsert1:; 2151 low = i; 2152 } 2153 ailen[brow] = nrow; 2154 } 2155 A->same_nonzero = PETSC_FALSE; 2156 PetscFunctionReturn(0); 2157 } 2158 2159 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2163 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2164 { 2165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2166 Mat outA; 2167 PetscErrorCode ierr; 2168 PetscTruth row_identity,col_identity; 2169 2170 PetscFunctionBegin; 2171 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2172 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2173 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2174 if (!row_identity || !col_identity) { 2175 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2176 } 2177 2178 outA = inA; 2179 inA->factor = MAT_FACTOR_LU; 2180 2181 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2182 2183 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2184 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2185 a->row = row; 2186 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2187 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2188 a->col = col; 2189 2190 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2191 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2192 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2193 2194 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2195 if (!a->solve_work) { 2196 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2197 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2198 } 2199 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2200 2201 PetscFunctionReturn(0); 2202 } 2203 2204 EXTERN_C_BEGIN 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2207 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2208 { 2209 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2210 PetscInt i,nz,mbs; 2211 2212 PetscFunctionBegin; 2213 nz = baij->maxnz/baij->bs2; 2214 mbs = baij->mbs; 2215 for (i=0; i<nz; i++) { 2216 baij->j[i] = indices[i]; 2217 } 2218 baij->nz = nz; 2219 for (i=0; i<mbs; i++) { 2220 baij->ilen[i] = baij->imax[i]; 2221 } 2222 PetscFunctionReturn(0); 2223 } 2224 EXTERN_C_END 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2228 /*@ 2229 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2230 in the matrix. 2231 2232 Input Parameters: 2233 + mat - the SeqBAIJ matrix 2234 - indices - the column indices 2235 2236 Level: advanced 2237 2238 Notes: 2239 This can be called if you have precomputed the nonzero structure of the 2240 matrix and want to provide it to the matrix object to improve the performance 2241 of the MatSetValues() operation. 2242 2243 You MUST have set the correct numbers of nonzeros per row in the call to 2244 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2245 2246 MUST be called before any calls to MatSetValues(); 2247 2248 @*/ 2249 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2250 { 2251 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2252 2253 PetscFunctionBegin; 2254 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2255 PetscValidPointer(indices,2); 2256 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2257 if (f) { 2258 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2259 } else { 2260 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2267 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2268 { 2269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2272 PetscReal atmp; 2273 PetscScalar *x,zero = 0.0; 2274 MatScalar *aa; 2275 PetscInt ncols,brow,krow,kcol; 2276 2277 PetscFunctionBegin; 2278 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2279 bs = A->rmap->bs; 2280 aa = a->a; 2281 ai = a->i; 2282 aj = a->j; 2283 mbs = a->mbs; 2284 2285 ierr = VecSet(v,zero);CHKERRQ(ierr); 2286 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2287 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2288 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2289 for (i=0; i<mbs; i++) { 2290 ncols = ai[1] - ai[0]; ai++; 2291 brow = bs*i; 2292 for (j=0; j<ncols; j++){ 2293 for (kcol=0; kcol<bs; kcol++){ 2294 for (krow=0; krow<bs; krow++){ 2295 atmp = PetscAbsScalar(*aa);aa++; 2296 row = brow + krow; /* row index */ 2297 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2298 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2299 } 2300 } 2301 aj++; 2302 } 2303 } 2304 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatCopy_SeqBAIJ" 2310 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2311 { 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 /* If the two matrices have the same copy implementation, use fast copy. */ 2316 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2318 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2319 2320 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2321 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2322 } 2323 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2324 } else { 2325 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2326 } 2327 PetscFunctionReturn(0); 2328 } 2329 2330 #undef __FUNCT__ 2331 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2332 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2333 { 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2343 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2344 { 2345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2346 PetscFunctionBegin; 2347 *array = a->a; 2348 PetscFunctionReturn(0); 2349 } 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2353 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2354 { 2355 PetscFunctionBegin; 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #include "petscblaslapack.h" 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2362 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2363 { 2364 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2365 PetscErrorCode ierr; 2366 PetscInt i,bs=Y->rmap->bs,j,bs2; 2367 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2368 2369 PetscFunctionBegin; 2370 if (str == SAME_NONZERO_PATTERN) { 2371 PetscScalar alpha = a; 2372 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2373 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2374 if (y->xtoy && y->XtoY != X) { 2375 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2376 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2377 } 2378 if (!y->xtoy) { /* get xtoy */ 2379 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2380 y->XtoY = X; 2381 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2382 } 2383 bs2 = bs*bs; 2384 for (i=0; i<x->nz; i++) { 2385 j = 0; 2386 while (j < bs2){ 2387 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2388 j++; 2389 } 2390 } 2391 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2392 } else { 2393 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2394 } 2395 PetscFunctionReturn(0); 2396 } 2397 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2400 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2401 { 2402 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2403 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2404 MatScalar *aa = a->a; 2405 2406 PetscFunctionBegin; 2407 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2413 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2414 { 2415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2416 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2417 MatScalar *aa = a->a; 2418 2419 PetscFunctionBegin; 2420 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2428 /* 2429 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2430 */ 2431 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2432 { 2433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2434 PetscErrorCode ierr; 2435 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2436 PetscInt nz = a->i[m],row,*jj,mr,col; 2437 2438 PetscFunctionBegin; 2439 *nn = n; 2440 if (!ia) PetscFunctionReturn(0); 2441 if (symmetric) { 2442 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2443 } else { 2444 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2445 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2446 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2447 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2448 jj = a->j; 2449 for (i=0; i<nz; i++) { 2450 collengths[jj[i]]++; 2451 } 2452 cia[0] = oshift; 2453 for (i=0; i<n; i++) { 2454 cia[i+1] = cia[i] + collengths[i]; 2455 } 2456 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2457 jj = a->j; 2458 for (row=0; row<m; row++) { 2459 mr = a->i[row+1] - a->i[row]; 2460 for (i=0; i<mr; i++) { 2461 col = *jj++; 2462 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2463 } 2464 } 2465 ierr = PetscFree(collengths);CHKERRQ(ierr); 2466 *ia = cia; *ja = cja; 2467 } 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2473 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2474 { 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 if (!ia) PetscFunctionReturn(0); 2479 ierr = PetscFree(*ia);CHKERRQ(ierr); 2480 ierr = PetscFree(*ja);CHKERRQ(ierr); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2486 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2487 { 2488 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2489 PetscErrorCode ierr; 2490 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2491 PetscScalar dx,*y,*xx,*w3_array; 2492 PetscScalar *vscale_array; 2493 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2494 Vec w1=coloring->w1,w2=coloring->w2,w3; 2495 void *fctx = coloring->fctx; 2496 PetscTruth flg = PETSC_FALSE; 2497 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2498 Vec x1_tmp; 2499 2500 PetscFunctionBegin; 2501 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2502 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2503 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2504 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2505 2506 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2507 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2508 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2509 if (flg) { 2510 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2511 } else { 2512 PetscTruth assembled; 2513 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2514 if (assembled) { 2515 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2516 } 2517 } 2518 2519 x1_tmp = x1; 2520 if (!coloring->vscale){ 2521 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2522 } 2523 2524 /* 2525 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2526 coloring->F for the coarser grids from the finest 2527 */ 2528 if (coloring->F) { 2529 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2530 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2531 if (m1 != m2) { 2532 coloring->F = 0; 2533 } 2534 } 2535 2536 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2537 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2538 } 2539 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2540 2541 /* Set w1 = F(x1) */ 2542 if (coloring->F) { 2543 w1 = coloring->F; /* use already computed value of function */ 2544 coloring->F = 0; 2545 } else { 2546 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2547 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2548 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2549 } 2550 2551 if (!coloring->w3) { 2552 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2553 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2554 } 2555 w3 = coloring->w3; 2556 2557 CHKMEMQ; 2558 /* Compute all the local scale factors, including ghost points */ 2559 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2560 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2561 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2562 if (ctype == IS_COLORING_GHOSTED){ 2563 col_start = 0; col_end = N; 2564 } else if (ctype == IS_COLORING_GLOBAL){ 2565 xx = xx - start; 2566 vscale_array = vscale_array - start; 2567 col_start = start; col_end = N + start; 2568 } CHKMEMQ; 2569 for (col=col_start; col<col_end; col++){ 2570 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2571 if (coloring->htype[0] == 'w') { 2572 dx = 1.0 + unorm; 2573 } else { 2574 dx = xx[col]; 2575 } 2576 if (dx == 0.0) dx = 1.0; 2577 #if !defined(PETSC_USE_COMPLEX) 2578 if (dx < umin && dx >= 0.0) dx = umin; 2579 else if (dx < 0.0 && dx > -umin) dx = -umin; 2580 #else 2581 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2582 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2583 #endif 2584 dx *= epsilon; 2585 vscale_array[col] = 1.0/dx; 2586 } CHKMEMQ; 2587 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2588 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2589 if (ctype == IS_COLORING_GLOBAL){ 2590 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2591 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2592 } 2593 CHKMEMQ; 2594 if (coloring->vscaleforrow) { 2595 vscaleforrow = coloring->vscaleforrow; 2596 } else { 2597 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2598 } 2599 2600 2601 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2602 /* 2603 Loop over each color 2604 */ 2605 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2606 for (k=0; k<coloring->ncolors; k++) { 2607 coloring->currentcolor = k; 2608 for (i=0; i<bs; i++) { 2609 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2610 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2611 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2612 /* 2613 Loop over each column associated with color 2614 adding the perturbation to the vector w3. 2615 */ 2616 for (l=0; l<coloring->ncolumns[k]; l++) { 2617 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2618 if (coloring->htype[0] == 'w') { 2619 dx = 1.0 + unorm; 2620 } else { 2621 dx = xx[col]; 2622 } 2623 if (dx == 0.0) dx = 1.0; 2624 #if !defined(PETSC_USE_COMPLEX) 2625 if (dx < umin && dx >= 0.0) dx = umin; 2626 else if (dx < 0.0 && dx > -umin) dx = -umin; 2627 #else 2628 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2629 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2630 #endif 2631 dx *= epsilon; 2632 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2633 w3_array[col] += dx; 2634 } 2635 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2636 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2637 2638 /* 2639 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2640 w2 = F(x1 + dx) - F(x1) 2641 */ 2642 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2643 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2644 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2645 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2646 2647 /* 2648 Loop over rows of vector, putting results into Jacobian matrix 2649 */ 2650 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2651 for (l=0; l<coloring->nrows[k]; l++) { 2652 row = bs*coloring->rows[k][l]; /* local row index */ 2653 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2654 for (j=0; j<bs; j++) { 2655 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2656 srows[j] = row + start + j; 2657 } 2658 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2659 } 2660 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2661 } 2662 } /* endof for each color */ 2663 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2664 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2665 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2666 ierr = PetscFree(srows);CHKERRQ(ierr); 2667 2668 coloring->currentcolor = -1; 2669 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2670 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2671 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 /* -------------------------------------------------------------------*/ 2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2677 MatGetRow_SeqBAIJ, 2678 MatRestoreRow_SeqBAIJ, 2679 MatMult_SeqBAIJ_N, 2680 /* 4*/ MatMultAdd_SeqBAIJ_N, 2681 MatMultTranspose_SeqBAIJ, 2682 MatMultTransposeAdd_SeqBAIJ, 2683 0, 2684 0, 2685 0, 2686 /*10*/ 0, 2687 MatLUFactor_SeqBAIJ, 2688 0, 2689 0, 2690 MatTranspose_SeqBAIJ, 2691 /*15*/ MatGetInfo_SeqBAIJ, 2692 MatEqual_SeqBAIJ, 2693 MatGetDiagonal_SeqBAIJ, 2694 MatDiagonalScale_SeqBAIJ, 2695 MatNorm_SeqBAIJ, 2696 /*20*/ 0, 2697 MatAssemblyEnd_SeqBAIJ, 2698 MatSetOption_SeqBAIJ, 2699 MatZeroEntries_SeqBAIJ, 2700 /*24*/ MatZeroRows_SeqBAIJ, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2706 0, 2707 0, 2708 MatGetArray_SeqBAIJ, 2709 MatRestoreArray_SeqBAIJ, 2710 /*34*/ MatDuplicate_SeqBAIJ, 2711 0, 2712 0, 2713 MatILUFactor_SeqBAIJ, 2714 0, 2715 /*39*/ MatAXPY_SeqBAIJ, 2716 MatGetSubMatrices_SeqBAIJ, 2717 MatIncreaseOverlap_SeqBAIJ, 2718 MatGetValues_SeqBAIJ, 2719 MatCopy_SeqBAIJ, 2720 /*44*/ 0, 2721 MatScale_SeqBAIJ, 2722 0, 2723 0, 2724 MatILUDTFactor_SeqBAIJ, 2725 /*49*/ 0, 2726 MatGetRowIJ_SeqBAIJ, 2727 MatRestoreRowIJ_SeqBAIJ, 2728 MatGetColumnIJ_SeqBAIJ, 2729 MatRestoreColumnIJ_SeqBAIJ, 2730 /*54*/ MatFDColoringCreate_SeqAIJ, 2731 0, 2732 0, 2733 0, 2734 MatSetValuesBlocked_SeqBAIJ, 2735 /*59*/ MatGetSubMatrix_SeqBAIJ, 2736 MatDestroy_SeqBAIJ, 2737 MatView_SeqBAIJ, 2738 0, 2739 0, 2740 /*64*/ 0, 2741 0, 2742 0, 2743 0, 2744 0, 2745 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2746 0, 2747 MatConvert_Basic, 2748 0, 2749 0, 2750 /*74*/ 0, 2751 MatFDColoringApply_BAIJ, 2752 0, 2753 0, 2754 0, 2755 /*79*/ 0, 2756 0, 2757 0, 2758 0, 2759 MatLoad_SeqBAIJ, 2760 /*84*/ 0, 2761 0, 2762 0, 2763 0, 2764 0, 2765 /*89*/ 0, 2766 0, 2767 0, 2768 0, 2769 0, 2770 /*94*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*99*/0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*104*/0, 2781 MatRealPart_SeqBAIJ, 2782 MatImaginaryPart_SeqBAIJ, 2783 0, 2784 0, 2785 /*109*/0, 2786 0, 2787 0, 2788 0, 2789 MatMissingDiagonal_SeqBAIJ 2790 /*114*/ 2791 }; 2792 2793 EXTERN_C_BEGIN 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2796 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2797 { 2798 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2799 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2800 PetscErrorCode ierr; 2801 2802 PetscFunctionBegin; 2803 if (aij->nonew != 1) { 2804 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2805 } 2806 2807 /* allocate space for values if not already there */ 2808 if (!aij->saved_values) { 2809 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2810 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2811 } 2812 2813 /* copy values over */ 2814 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2815 PetscFunctionReturn(0); 2816 } 2817 EXTERN_C_END 2818 2819 EXTERN_C_BEGIN 2820 #undef __FUNCT__ 2821 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2822 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2823 { 2824 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2825 PetscErrorCode ierr; 2826 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2827 2828 PetscFunctionBegin; 2829 if (aij->nonew != 1) { 2830 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2831 } 2832 if (!aij->saved_values) { 2833 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2834 } 2835 2836 /* copy values over */ 2837 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840 EXTERN_C_END 2841 2842 EXTERN_C_BEGIN 2843 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2844 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2845 EXTERN_C_END 2846 2847 EXTERN_C_BEGIN 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2850 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2851 { 2852 Mat_SeqBAIJ *b; 2853 PetscErrorCode ierr; 2854 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2855 PetscTruth flg,skipallocation = PETSC_FALSE; 2856 2857 PetscFunctionBegin; 2858 2859 if (nz == MAT_SKIP_ALLOCATION) { 2860 skipallocation = PETSC_TRUE; 2861 nz = 0; 2862 } 2863 2864 if (bs < 0) { 2865 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2866 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2867 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2868 bs = PetscAbs(bs); 2869 } 2870 if (nnz && newbs != bs) { 2871 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2872 } 2873 bs = newbs; 2874 2875 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2876 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2877 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2878 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2879 2880 B->preallocated = PETSC_TRUE; 2881 2882 mbs = B->rmap->n/bs; 2883 nbs = B->cmap->n/bs; 2884 bs2 = bs*bs; 2885 2886 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2887 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2888 } 2889 2890 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2891 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2892 if (nnz) { 2893 for (i=0; i<mbs; i++) { 2894 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2895 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2896 } 2897 } 2898 2899 b = (Mat_SeqBAIJ*)B->data; 2900 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2901 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2902 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2903 2904 if (!flg) { 2905 switch (bs) { 2906 case 1: 2907 B->ops->mult = MatMult_SeqBAIJ_1; 2908 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2909 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2910 break; 2911 case 2: 2912 B->ops->mult = MatMult_SeqBAIJ_2; 2913 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2914 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2915 break; 2916 case 3: 2917 B->ops->mult = MatMult_SeqBAIJ_3; 2918 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2919 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2920 break; 2921 case 4: 2922 B->ops->mult = MatMult_SeqBAIJ_4; 2923 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2924 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2925 break; 2926 case 5: 2927 B->ops->mult = MatMult_SeqBAIJ_5; 2928 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2929 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2930 break; 2931 case 6: 2932 B->ops->mult = MatMult_SeqBAIJ_6; 2933 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2934 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2935 break; 2936 case 7: 2937 B->ops->mult = MatMult_SeqBAIJ_7; 2938 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2939 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2940 break; 2941 default: 2942 B->ops->mult = MatMult_SeqBAIJ_N; 2943 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2944 break; 2945 } 2946 } 2947 B->rmap->bs = bs; 2948 b->mbs = mbs; 2949 b->nbs = nbs; 2950 if (!skipallocation) { 2951 if (!b->imax) { 2952 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2953 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2954 b->free_imax_ilen = PETSC_TRUE; 2955 } 2956 /* b->ilen will count nonzeros in each block row so far. */ 2957 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2958 if (!nnz) { 2959 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2960 else if (nz <= 0) nz = 1; 2961 for (i=0; i<mbs; i++) b->imax[i] = nz; 2962 nz = nz*mbs; 2963 } else { 2964 nz = 0; 2965 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2966 } 2967 2968 /* allocate the matrix space */ 2969 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2970 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2971 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2972 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2973 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2974 b->singlemalloc = PETSC_TRUE; 2975 b->i[0] = 0; 2976 for (i=1; i<mbs+1; i++) { 2977 b->i[i] = b->i[i-1] + b->imax[i-1]; 2978 } 2979 b->free_a = PETSC_TRUE; 2980 b->free_ij = PETSC_TRUE; 2981 } else { 2982 b->free_a = PETSC_FALSE; 2983 b->free_ij = PETSC_FALSE; 2984 } 2985 2986 B->rmap->bs = bs; 2987 b->bs2 = bs2; 2988 b->mbs = mbs; 2989 b->nz = 0; 2990 b->maxnz = nz*bs2; 2991 B->info.nz_unneeded = (PetscReal)b->maxnz; 2992 PetscFunctionReturn(0); 2993 } 2994 EXTERN_C_END 2995 2996 EXTERN_C_BEGIN 2997 #undef __FUNCT__ 2998 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2999 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3000 { 3001 PetscInt i,m,nz,nz_max=0,*nnz; 3002 PetscScalar *values=0; 3003 PetscErrorCode ierr; 3004 3005 PetscFunctionBegin; 3006 3007 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3008 3009 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3010 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3011 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3012 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3013 m = B->rmap->n/bs; 3014 3015 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3016 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3017 for(i=0; i<m; i++) { 3018 nz = ii[i+1]- ii[i]; 3019 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3020 nz_max = PetscMax(nz_max, nz); 3021 nnz[i] = nz; 3022 } 3023 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3024 ierr = PetscFree(nnz);CHKERRQ(ierr); 3025 3026 values = (PetscScalar*)V; 3027 if (!values) { 3028 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3029 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3030 } 3031 for (i=0; i<m; i++) { 3032 PetscInt ncols = ii[i+1] - ii[i]; 3033 const PetscInt *icols = jj + ii[i]; 3034 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3035 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3036 } 3037 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3038 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 3041 PetscFunctionReturn(0); 3042 } 3043 EXTERN_C_END 3044 3045 3046 EXTERN_C_BEGIN 3047 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3048 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3049 EXTERN_C_END 3050 3051 /*MC 3052 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3053 block sparse compressed row format. 3054 3055 Options Database Keys: 3056 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3057 3058 Level: beginner 3059 3060 .seealso: MatCreateSeqBAIJ() 3061 M*/ 3062 3063 3064 EXTERN_C_BEGIN 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatCreate_SeqBAIJ" 3067 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3068 { 3069 PetscErrorCode ierr; 3070 PetscMPIInt size; 3071 Mat_SeqBAIJ *b; 3072 3073 PetscFunctionBegin; 3074 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3075 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3076 3077 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3078 B->data = (void*)b; 3079 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3080 B->mapping = 0; 3081 b->row = 0; 3082 b->col = 0; 3083 b->icol = 0; 3084 b->reallocs = 0; 3085 b->saved_values = 0; 3086 3087 b->roworiented = PETSC_TRUE; 3088 b->nonew = 0; 3089 b->diag = 0; 3090 b->solve_work = 0; 3091 b->mult_work = 0; 3092 B->spptr = 0; 3093 B->info.nz_unneeded = (PetscReal)b->maxnz; 3094 b->keepnonzeropattern = PETSC_FALSE; 3095 b->xtoy = 0; 3096 b->XtoY = 0; 3097 b->compressedrow.use = PETSC_FALSE; 3098 b->compressedrow.nrows = 0; 3099 b->compressedrow.i = PETSC_NULL; 3100 b->compressedrow.rindex = PETSC_NULL; 3101 b->compressedrow.checked = PETSC_FALSE; 3102 B->same_nonzero = PETSC_FALSE; 3103 3104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 3105 "MatGetFactorAvailable_seqbaij_petsc", 3106 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 3108 "MatGetFactor_seqbaij_petsc", 3109 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3111 "MatInvertBlockDiagonal_SeqBAIJ", 3112 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3114 "MatStoreValues_SeqBAIJ", 3115 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3117 "MatRetrieveValues_SeqBAIJ", 3118 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3120 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3121 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3123 "MatConvert_SeqBAIJ_SeqAIJ", 3124 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3126 "MatConvert_SeqBAIJ_SeqSBAIJ", 3127 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3129 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3130 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3132 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3133 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3134 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 EXTERN_C_END 3138 3139 #undef __FUNCT__ 3140 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3141 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3142 { 3143 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3144 PetscErrorCode ierr; 3145 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3146 3147 PetscFunctionBegin; 3148 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3149 3150 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3151 c->imax = a->imax; 3152 c->ilen = a->ilen; 3153 c->free_imax_ilen = PETSC_FALSE; 3154 } else { 3155 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3156 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3157 for (i=0; i<mbs; i++) { 3158 c->imax[i] = a->imax[i]; 3159 c->ilen[i] = a->ilen[i]; 3160 } 3161 c->free_imax_ilen = PETSC_TRUE; 3162 } 3163 3164 /* allocate the matrix space */ 3165 if (mallocmatspace){ 3166 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3167 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3168 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3169 c->singlemalloc = PETSC_FALSE; 3170 c->free_ij = PETSC_FALSE; 3171 c->i = a->i; 3172 c->j = a->j; 3173 c->parent = A; 3174 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3175 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3176 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3177 } else { 3178 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3179 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3180 c->singlemalloc = PETSC_TRUE; 3181 c->free_ij = PETSC_TRUE; 3182 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3183 if (mbs > 0) { 3184 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3185 if (cpvalues == MAT_COPY_VALUES) { 3186 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3187 } else { 3188 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3189 } 3190 } 3191 } 3192 } 3193 3194 c->roworiented = a->roworiented; 3195 c->nonew = a->nonew; 3196 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 3197 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 3198 c->bs2 = a->bs2; 3199 c->mbs = a->mbs; 3200 c->nbs = a->nbs; 3201 3202 if (a->diag) { 3203 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3204 c->diag = a->diag; 3205 c->free_diag = PETSC_FALSE; 3206 } else { 3207 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3208 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3209 for (i=0; i<mbs; i++) { 3210 c->diag[i] = a->diag[i]; 3211 } 3212 c->free_diag = PETSC_TRUE; 3213 } 3214 } else c->diag = 0; 3215 c->nz = a->nz; 3216 c->maxnz = a->maxnz; 3217 c->solve_work = 0; 3218 c->mult_work = 0; 3219 c->free_a = PETSC_TRUE; 3220 c->free_ij = PETSC_TRUE; 3221 C->preallocated = PETSC_TRUE; 3222 C->assembled = PETSC_TRUE; 3223 3224 c->compressedrow.use = a->compressedrow.use; 3225 c->compressedrow.nrows = a->compressedrow.nrows; 3226 c->compressedrow.checked = a->compressedrow.checked; 3227 if ( a->compressedrow.checked && a->compressedrow.use){ 3228 i = a->compressedrow.nrows; 3229 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 3230 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3231 c->compressedrow.rindex = c->compressedrow.i + i + 1; 3232 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3233 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3234 } else { 3235 c->compressedrow.use = PETSC_FALSE; 3236 c->compressedrow.i = PETSC_NULL; 3237 c->compressedrow.rindex = PETSC_NULL; 3238 } 3239 C->same_nonzero = A->same_nonzero; 3240 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3246 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3247 { 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3252 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3253 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3254 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3255 PetscFunctionReturn(0); 3256 } 3257 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "MatLoad_SeqBAIJ" 3260 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3261 { 3262 Mat_SeqBAIJ *a; 3263 Mat B; 3264 PetscErrorCode ierr; 3265 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3266 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3267 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3268 PetscInt *masked,nmask,tmp,bs2,ishift; 3269 PetscMPIInt size; 3270 int fd; 3271 PetscScalar *aa; 3272 MPI_Comm comm = ((PetscObject)viewer)->comm; 3273 3274 PetscFunctionBegin; 3275 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3276 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3277 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3278 bs2 = bs*bs; 3279 3280 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3281 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3282 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3283 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3284 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3285 M = header[1]; N = header[2]; nz = header[3]; 3286 3287 if (header[3] < 0) { 3288 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3289 } 3290 3291 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3292 3293 /* 3294 This code adds extra rows to make sure the number of rows is 3295 divisible by the blocksize 3296 */ 3297 mbs = M/bs; 3298 extra_rows = bs - M + bs*(mbs); 3299 if (extra_rows == bs) extra_rows = 0; 3300 else mbs++; 3301 if (extra_rows) { 3302 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3303 } 3304 3305 /* read in row lengths */ 3306 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3307 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3308 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3309 3310 /* read in column indices */ 3311 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3312 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3313 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3314 3315 /* loop over row lengths determining block row lengths */ 3316 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3317 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3318 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3319 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3320 masked = mask + mbs; 3321 rowcount = 0; nzcount = 0; 3322 for (i=0; i<mbs; i++) { 3323 nmask = 0; 3324 for (j=0; j<bs; j++) { 3325 kmax = rowlengths[rowcount]; 3326 for (k=0; k<kmax; k++) { 3327 tmp = jj[nzcount++]/bs; 3328 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3329 } 3330 rowcount++; 3331 } 3332 browlengths[i] += nmask; 3333 /* zero out the mask elements we set */ 3334 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3335 } 3336 3337 /* create our matrix */ 3338 ierr = MatCreate(comm,&B); 3339 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3340 ierr = MatSetType(B,type);CHKERRQ(ierr); 3341 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3342 a = (Mat_SeqBAIJ*)B->data; 3343 3344 /* set matrix "i" values */ 3345 a->i[0] = 0; 3346 for (i=1; i<= mbs; i++) { 3347 a->i[i] = a->i[i-1] + browlengths[i-1]; 3348 a->ilen[i-1] = browlengths[i-1]; 3349 } 3350 a->nz = 0; 3351 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3352 3353 /* read in nonzero values */ 3354 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3355 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3356 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3357 3358 /* set "a" and "j" values into matrix */ 3359 nzcount = 0; jcount = 0; 3360 for (i=0; i<mbs; i++) { 3361 nzcountb = nzcount; 3362 nmask = 0; 3363 for (j=0; j<bs; j++) { 3364 kmax = rowlengths[i*bs+j]; 3365 for (k=0; k<kmax; k++) { 3366 tmp = jj[nzcount++]/bs; 3367 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3368 } 3369 } 3370 /* sort the masked values */ 3371 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3372 3373 /* set "j" values into matrix */ 3374 maskcount = 1; 3375 for (j=0; j<nmask; j++) { 3376 a->j[jcount++] = masked[j]; 3377 mask[masked[j]] = maskcount++; 3378 } 3379 /* set "a" values into matrix */ 3380 ishift = bs2*a->i[i]; 3381 for (j=0; j<bs; j++) { 3382 kmax = rowlengths[i*bs+j]; 3383 for (k=0; k<kmax; k++) { 3384 tmp = jj[nzcountb]/bs ; 3385 block = mask[tmp] - 1; 3386 point = jj[nzcountb] - bs*tmp; 3387 idx = ishift + bs2*block + j + bs*point; 3388 a->a[idx] = (MatScalar)aa[nzcountb++]; 3389 } 3390 } 3391 /* zero out the mask elements we set */ 3392 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3393 } 3394 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3395 3396 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3397 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3398 ierr = PetscFree(aa);CHKERRQ(ierr); 3399 ierr = PetscFree(jj);CHKERRQ(ierr); 3400 ierr = PetscFree(mask);CHKERRQ(ierr); 3401 3402 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3403 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3404 ierr = MatView_Private(B);CHKERRQ(ierr); 3405 3406 *A = B; 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatCreateSeqBAIJ" 3412 /*@C 3413 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3414 compressed row) format. For good matrix assembly performance the 3415 user should preallocate the matrix storage by setting the parameter nz 3416 (or the array nnz). By setting these parameters accurately, performance 3417 during matrix assembly can be increased by more than a factor of 50. 3418 3419 Collective on MPI_Comm 3420 3421 Input Parameters: 3422 + comm - MPI communicator, set to PETSC_COMM_SELF 3423 . bs - size of block 3424 . m - number of rows 3425 . n - number of columns 3426 . nz - number of nonzero blocks per block row (same for all rows) 3427 - nnz - array containing the number of nonzero blocks in the various block rows 3428 (possibly different for each block row) or PETSC_NULL 3429 3430 Output Parameter: 3431 . A - the matrix 3432 3433 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3434 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3435 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3436 3437 Options Database Keys: 3438 . -mat_no_unroll - uses code that does not unroll the loops in the 3439 block calculations (much slower) 3440 . -mat_block_size - size of the blocks to use 3441 3442 Level: intermediate 3443 3444 Notes: 3445 The number of rows and columns must be divisible by blocksize. 3446 3447 If the nnz parameter is given then the nz parameter is ignored 3448 3449 A nonzero block is any block that as 1 or more nonzeros in it 3450 3451 The block AIJ format is fully compatible with standard Fortran 77 3452 storage. That is, the stored row and column indices can begin at 3453 either one (as in Fortran) or zero. See the users' manual for details. 3454 3455 Specify the preallocated storage with either nz or nnz (not both). 3456 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3457 allocation. For additional details, see the users manual chapter on 3458 matrices. 3459 3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3461 @*/ 3462 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3463 { 3464 PetscErrorCode ierr; 3465 3466 PetscFunctionBegin; 3467 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3468 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3469 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3470 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3471 PetscFunctionReturn(0); 3472 } 3473 3474 #undef __FUNCT__ 3475 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3476 /*@C 3477 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3478 per row in the matrix. For good matrix assembly performance the 3479 user should preallocate the matrix storage by setting the parameter nz 3480 (or the array nnz). By setting these parameters accurately, performance 3481 during matrix assembly can be increased by more than a factor of 50. 3482 3483 Collective on MPI_Comm 3484 3485 Input Parameters: 3486 + A - the matrix 3487 . bs - size of block 3488 . nz - number of block nonzeros per block row (same for all rows) 3489 - nnz - array containing the number of block nonzeros in the various block rows 3490 (possibly different for each block row) or PETSC_NULL 3491 3492 Options Database Keys: 3493 . -mat_no_unroll - uses code that does not unroll the loops in the 3494 block calculations (much slower) 3495 . -mat_block_size - size of the blocks to use 3496 3497 Level: intermediate 3498 3499 Notes: 3500 If the nnz parameter is given then the nz parameter is ignored 3501 3502 You can call MatGetInfo() to get information on how effective the preallocation was; 3503 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3504 You can also run with the option -info and look for messages with the string 3505 malloc in them to see if additional memory allocation was needed. 3506 3507 The block AIJ format is fully compatible with standard Fortran 77 3508 storage. That is, the stored row and column indices can begin at 3509 either one (as in Fortran) or zero. See the users' manual for details. 3510 3511 Specify the preallocated storage with either nz or nnz (not both). 3512 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3513 allocation. For additional details, see the users manual chapter on 3514 matrices. 3515 3516 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3517 @*/ 3518 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3519 { 3520 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3521 3522 PetscFunctionBegin; 3523 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3524 if (f) { 3525 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3526 } 3527 PetscFunctionReturn(0); 3528 } 3529 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3532 /*@C 3533 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3534 (the default sequential PETSc format). 3535 3536 Collective on MPI_Comm 3537 3538 Input Parameters: 3539 + A - the matrix 3540 . i - the indices into j for the start of each local row (starts with zero) 3541 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3542 - v - optional values in the matrix 3543 3544 Level: developer 3545 3546 .keywords: matrix, aij, compressed row, sparse 3547 3548 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3549 @*/ 3550 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3551 { 3552 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3553 3554 PetscFunctionBegin; 3555 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3556 if (f) { 3557 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3558 } 3559 PetscFunctionReturn(0); 3560 } 3561 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3565 /*@ 3566 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3567 (upper triangular entries in CSR format) provided by the user. 3568 3569 Collective on MPI_Comm 3570 3571 Input Parameters: 3572 + comm - must be an MPI communicator of size 1 3573 . bs - size of block 3574 . m - number of rows 3575 . n - number of columns 3576 . i - row indices 3577 . j - column indices 3578 - a - matrix values 3579 3580 Output Parameter: 3581 . mat - the matrix 3582 3583 Level: intermediate 3584 3585 Notes: 3586 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3587 once the matrix is destroyed 3588 3589 You cannot set new nonzero locations into this matrix, that will generate an error. 3590 3591 The i and j indices are 0 based 3592 3593 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3594 3595 @*/ 3596 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3597 { 3598 PetscErrorCode ierr; 3599 PetscInt ii; 3600 Mat_SeqBAIJ *baij; 3601 3602 PetscFunctionBegin; 3603 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3604 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3605 3606 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3607 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3608 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3609 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3610 baij = (Mat_SeqBAIJ*)(*mat)->data; 3611 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3612 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3613 3614 baij->i = i; 3615 baij->j = j; 3616 baij->a = a; 3617 baij->singlemalloc = PETSC_FALSE; 3618 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3619 baij->free_a = PETSC_FALSE; 3620 baij->free_ij = PETSC_FALSE; 3621 3622 for (ii=0; ii<m; ii++) { 3623 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3624 #if defined(PETSC_USE_DEBUG) 3625 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3626 #endif 3627 } 3628 #if defined(PETSC_USE_DEBUG) 3629 for (ii=0; ii<baij->i[m]; ii++) { 3630 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3631 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3632 } 3633 #endif 3634 3635 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 PetscFunctionReturn(0); 3638 } 3639