1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,idx; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 while (nz--) { 175 idx = (*vi++); 176 x1 = x[idx]; 177 s1 -= v[0]*x1; 178 v += 1; 179 } 180 x[i2] = idiag[0]*s1; 181 idiag += 1; 182 i2 += 1; 183 } 184 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 185 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 186 } 187 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 188 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 189 i2 = 0; 190 mdiag = a->idiag+a->mbs; 191 for (i=0; i<m; i++) { 192 x1 = x[i2]; 193 x[i2] = mdiag[0]*x1; 194 mdiag += 1; 195 i2 += 1; 196 } 197 ierr = PetscLogFlops(m);CHKERRQ(ierr); 198 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 199 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 200 } 201 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 202 idiag = a->idiag+a->mbs - 1; 203 i2 = m - 1; 204 x1 = x[i2]; 205 x[i2] = idiag[0]*x1; 206 idiag -= 1; 207 i2 -= 1; 208 for (i=m-2; i>=0; i--) { 209 v = aa + (diag[i]+1); 210 vi = aj + diag[i] + 1; 211 nz = ai[i+1] - diag[i] - 1; 212 s1 = x[i2]; 213 while (nz--) { 214 idx = (*vi++); 215 x1 = x[idx]; 216 s1 -= v[0]*x1; 217 v += 1; 218 } 219 x[i2] = idiag[0]*s1; 220 idiag -= 1; 221 i2 -= 1; 222 } 223 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 224 } 225 } else { 226 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 227 } 228 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 229 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *x,x1,x2,s1,s2; 239 const PetscScalar *b; 240 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 241 PetscErrorCode ierr; 242 PetscInt m = a->mbs,i,i2,nz,idx; 243 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 244 245 PetscFunctionBegin; 246 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 247 its = its*lits; 248 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 249 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 250 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 251 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 252 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 253 254 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 255 256 diag = a->diag; 257 idiag = a->idiag; 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 260 261 if (flag & SOR_ZERO_INITIAL_GUESS) { 262 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 263 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 264 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 265 i2 = 2; 266 idiag += 4; 267 for (i=1; i<m; i++) { 268 v = aa + 4*ai[i]; 269 vi = aj + ai[i]; 270 nz = diag[i] - ai[i]; 271 s1 = b[i2]; s2 = b[i2+1]; 272 while (nz--) { 273 idx = 2*(*vi++); 274 x1 = x[idx]; x2 = x[1+idx]; 275 s1 -= v[0]*x1 + v[2]*x2; 276 s2 -= v[1]*x1 + v[3]*x2; 277 v += 4; 278 } 279 x[i2] = idiag[0]*s1 + idiag[2]*s2; 280 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 281 idiag += 4; 282 i2 += 2; 283 } 284 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 285 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 286 } 287 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 288 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 289 i2 = 0; 290 mdiag = a->idiag+4*a->mbs; 291 for (i=0; i<m; i++) { 292 x1 = x[i2]; x2 = x[i2+1]; 293 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 294 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 295 mdiag += 4; 296 i2 += 2; 297 } 298 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 299 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 300 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 301 } 302 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 303 idiag = a->idiag+4*a->mbs - 4; 304 i2 = 2*m - 2; 305 x1 = x[i2]; x2 = x[i2+1]; 306 x[i2] = idiag[0]*x1 + idiag[2]*x2; 307 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 308 idiag -= 4; 309 i2 -= 2; 310 for (i=m-2; i>=0; i--) { 311 v = aa + 4*(diag[i]+1); 312 vi = aj + diag[i] + 1; 313 nz = ai[i+1] - diag[i] - 1; 314 s1 = x[i2]; s2 = x[i2+1]; 315 while (nz--) { 316 idx = 2*(*vi++); 317 x1 = x[idx]; x2 = x[1+idx]; 318 s1 -= v[0]*x1 + v[2]*x2; 319 s2 -= v[1]*x1 + v[3]*x2; 320 v += 4; 321 } 322 x[i2] = idiag[0]*s1 + idiag[2]*s2; 323 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 324 idiag -= 4; 325 i2 -= 2; 326 } 327 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 328 } 329 } else { 330 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 331 } 332 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 333 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 340 { 341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 342 PetscScalar *x,x1,x2,x3,s1,s2,s3; 343 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 344 const PetscScalar *b; 345 PetscErrorCode ierr; 346 PetscInt m = a->mbs,i,i2,nz,idx; 347 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 348 349 PetscFunctionBegin; 350 its = its*lits; 351 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 352 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 353 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 354 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 355 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 356 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 357 358 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 359 360 diag = a->diag; 361 idiag = a->idiag; 362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 363 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 364 365 if (flag & SOR_ZERO_INITIAL_GUESS) { 366 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 367 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 368 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 369 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 370 i2 = 3; 371 idiag += 9; 372 for (i=1; i<m; i++) { 373 v = aa + 9*ai[i]; 374 vi = aj + ai[i]; 375 nz = diag[i] - ai[i]; 376 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 377 while (nz--) { 378 idx = 3*(*vi++); 379 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 380 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 381 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 382 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 383 v += 9; 384 } 385 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 386 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 387 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 388 idiag += 9; 389 i2 += 3; 390 } 391 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 392 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 393 } 394 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 395 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 396 i2 = 0; 397 mdiag = a->idiag+9*a->mbs; 398 for (i=0; i<m; i++) { 399 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 400 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 401 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 402 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 403 mdiag += 9; 404 i2 += 3; 405 } 406 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 407 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 408 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 409 } 410 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 411 idiag = a->idiag+9*a->mbs - 9; 412 i2 = 3*m - 3; 413 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 414 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 415 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 416 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 417 idiag -= 9; 418 i2 -= 3; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 9*(diag[i]+1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 424 while (nz--) { 425 idx = 3*(*vi++); 426 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 427 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 428 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 429 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 430 v += 9; 431 } 432 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 433 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 434 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 435 idiag -= 9; 436 i2 -= 3; 437 } 438 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 439 } 440 } else { 441 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 442 } 443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 451 { 452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 453 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 454 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 455 const PetscScalar *b; 456 PetscErrorCode ierr; 457 PetscInt m = a->mbs,i,i2,nz,idx; 458 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 459 460 PetscFunctionBegin; 461 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 462 its = its*lits; 463 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 464 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 465 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 466 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 467 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 468 469 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 470 471 diag = a->diag; 472 idiag = a->idiag; 473 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 475 476 if (flag & SOR_ZERO_INITIAL_GUESS) { 477 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 478 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 479 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 480 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 481 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 482 i2 = 4; 483 idiag += 16; 484 for (i=1; i<m; i++) { 485 v = aa + 16*ai[i]; 486 vi = aj + ai[i]; 487 nz = diag[i] - ai[i]; 488 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 489 while (nz--) { 490 idx = 4*(*vi++); 491 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 492 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 493 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 494 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 495 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 496 v += 16; 497 } 498 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 499 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 500 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 501 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 502 idiag += 16; 503 i2 += 4; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+16*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 515 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 516 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 517 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 518 mdiag += 16; 519 i2 += 4; 520 } 521 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 522 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 523 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 524 } 525 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 526 idiag = a->idiag+16*a->mbs - 16; 527 i2 = 4*m - 4; 528 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 529 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 530 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 531 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 532 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 533 idiag -= 16; 534 i2 -= 4; 535 for (i=m-2; i>=0; i--) { 536 v = aa + 16*(diag[i]+1); 537 vi = aj + diag[i] + 1; 538 nz = ai[i+1] - diag[i] - 1; 539 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 540 while (nz--) { 541 idx = 4*(*vi++); 542 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 543 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 544 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 545 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 546 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 547 v += 16; 548 } 549 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 550 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 551 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 552 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 553 idiag -= 16; 554 i2 -= 4; 555 } 556 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 569 { 570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 571 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 572 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 573 const PetscScalar *b; 574 PetscErrorCode ierr; 575 PetscInt m = a->mbs,i,i2,nz,idx; 576 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 577 578 PetscFunctionBegin; 579 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 580 its = its*lits; 581 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 582 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 583 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 584 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 585 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 586 587 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 588 589 diag = a->diag; 590 idiag = a->idiag; 591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 592 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 593 594 if (flag & SOR_ZERO_INITIAL_GUESS) { 595 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 596 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 597 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 598 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 599 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 600 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 601 i2 = 5; 602 idiag += 25; 603 for (i=1; i<m; i++) { 604 v = aa + 25*ai[i]; 605 vi = aj + ai[i]; 606 nz = diag[i] - ai[i]; 607 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 608 while (nz--) { 609 idx = 5*(*vi++); 610 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 611 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 612 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 613 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 614 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 615 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 616 v += 25; 617 } 618 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 619 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 620 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 621 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 622 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 623 idiag += 25; 624 i2 += 5; 625 } 626 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 627 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 628 } 629 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 630 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 631 i2 = 0; 632 mdiag = a->idiag+25*a->mbs; 633 for (i=0; i<m; i++) { 634 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 635 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 636 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 637 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 638 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 639 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 640 mdiag += 25; 641 i2 += 5; 642 } 643 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 644 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 645 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 646 } 647 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 648 idiag = a->idiag+25*a->mbs - 25; 649 i2 = 5*m - 5; 650 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 651 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 652 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 653 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 654 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 655 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 656 idiag -= 25; 657 i2 -= 5; 658 for (i=m-2; i>=0; i--) { 659 v = aa + 25*(diag[i]+1); 660 vi = aj + diag[i] + 1; 661 nz = ai[i+1] - diag[i] - 1; 662 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 663 while (nz--) { 664 idx = 5*(*vi++); 665 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 666 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 667 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 668 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 669 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 670 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 671 v += 25; 672 } 673 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 674 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 675 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 676 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 677 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 678 idiag -= 25; 679 i2 -= 5; 680 } 681 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 682 } 683 } else { 684 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 685 } 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 694 { 695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 696 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 697 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 698 const PetscScalar *b; 699 PetscErrorCode ierr; 700 PetscInt m = a->mbs,i,i2,nz,idx; 701 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 702 703 PetscFunctionBegin; 704 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 705 its = its*lits; 706 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 707 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 708 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 709 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 710 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 711 712 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 713 714 diag = a->diag; 715 idiag = a->idiag; 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 718 719 if (flag & SOR_ZERO_INITIAL_GUESS) { 720 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 721 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 722 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 723 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 724 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 725 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 726 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 727 i2 = 6; 728 idiag += 36; 729 for (i=1; i<m; i++) { 730 v = aa + 36*ai[i]; 731 vi = aj + ai[i]; 732 nz = diag[i] - ai[i]; 733 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 734 while (nz--) { 735 idx = 6*(*vi++); 736 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 737 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 738 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 739 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 740 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 741 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 742 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 743 v += 36; 744 } 745 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 746 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 747 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 748 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 749 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 750 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 751 idiag += 36; 752 i2 += 6; 753 } 754 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 755 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 756 } 757 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 758 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 759 i2 = 0; 760 mdiag = a->idiag+36*a->mbs; 761 for (i=0; i<m; i++) { 762 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 763 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 764 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 765 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 766 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 767 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 768 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 769 mdiag += 36; 770 i2 += 6; 771 } 772 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 773 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 774 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 775 } 776 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 777 idiag = a->idiag+36*a->mbs - 36; 778 i2 = 6*m - 6; 779 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 780 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 781 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 782 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 783 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 784 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 785 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 786 idiag -= 36; 787 i2 -= 6; 788 for (i=m-2; i>=0; i--) { 789 v = aa + 36*(diag[i]+1); 790 vi = aj + diag[i] + 1; 791 nz = ai[i+1] - diag[i] - 1; 792 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 793 while (nz--) { 794 idx = 6*(*vi++); 795 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 796 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 797 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 798 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 799 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 800 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 801 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 802 v += 36; 803 } 804 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 805 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 806 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 807 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 808 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 809 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 810 idiag -= 36; 811 i2 -= 6; 812 } 813 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 814 } 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 817 } 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 826 { 827 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 828 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 829 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 830 const PetscScalar *b; 831 PetscErrorCode ierr; 832 PetscInt m = a->mbs,i,i2,nz,idx; 833 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 834 835 PetscFunctionBegin; 836 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 837 its = its*lits; 838 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 839 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 840 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 841 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 842 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 843 844 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 845 846 diag = a->diag; 847 idiag = a->idiag; 848 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 849 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 850 851 if (flag & SOR_ZERO_INITIAL_GUESS) { 852 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 853 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 854 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 855 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 856 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 857 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 858 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 859 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 860 i2 = 7; 861 idiag += 49; 862 for (i=1; i<m; i++) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = diag[i] - ai[i]; 866 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 867 while (nz--) { 868 idx = 7*(*vi++); 869 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 870 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 871 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 872 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 873 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 874 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 875 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 876 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 877 v += 49; 878 } 879 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 880 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 881 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 882 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 883 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 884 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 885 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 886 idiag += 49; 887 i2 += 7; 888 } 889 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 890 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 891 } 892 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 893 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 894 i2 = 0; 895 mdiag = a->idiag+49*a->mbs; 896 for (i=0; i<m; i++) { 897 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 898 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 899 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 900 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 901 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 902 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 903 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 904 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 905 mdiag += 36; 906 i2 += 6; 907 } 908 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 909 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 910 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 911 } 912 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 913 idiag = a->idiag+49*a->mbs - 49; 914 i2 = 7*m - 7; 915 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 916 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 917 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 918 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 919 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 920 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 921 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 922 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 923 idiag -= 49; 924 i2 -= 7; 925 for (i=m-2; i>=0; i--) { 926 v = aa + 49*(diag[i]+1); 927 vi = aj + diag[i] + 1; 928 nz = ai[i+1] - diag[i] - 1; 929 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 930 while (nz--) { 931 idx = 7*(*vi++); 932 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 933 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 934 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 935 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 936 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 937 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 938 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 939 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 940 v += 49; 941 } 942 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 943 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 944 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 945 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 946 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 947 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 948 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 949 idiag -= 49; 950 i2 -= 7; 951 } 952 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 953 } 954 } else { 955 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 956 } 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 /* 963 Special version for direct calls from Fortran (Used in PETSc-fun3d) 964 */ 965 #if defined(PETSC_HAVE_FORTRAN_CAPS) 966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 968 #define matsetvaluesblocked4_ matsetvaluesblocked4 969 #endif 970 971 EXTERN_C_BEGIN 972 #undef __FUNCT__ 973 #define __FUNCT__ "matsetvaluesblocked4_" 974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 975 { 976 Mat A = *AA; 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 979 PetscInt *ai=a->i,*ailen=a->ilen; 980 PetscInt *aj=a->j,stepval,lastcol = -1; 981 const PetscScalar *value = v; 982 MatScalar *ap,*aa = a->a,*bap; 983 984 PetscFunctionBegin; 985 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 986 stepval = (n-1)*4; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; 989 rp = aj + ai[row]; 990 ap = aa + 16*ai[row]; 991 nrow = ailen[row]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; 996 if (col <= lastcol) low = 0; else high = nrow; 997 lastcol = col; 998 value = v + k*(stepval+4 + l)*4; 999 while (high-low > 7) { 1000 t = (low+high)/2; 1001 if (rp[t] > col) high = t; 1002 else low = t; 1003 } 1004 for (i=low; i<high; i++) { 1005 if (rp[i] > col) break; 1006 if (rp[i] == col) { 1007 bap = ap + 16*i; 1008 for (ii=0; ii<4; ii++,value+=stepval) { 1009 for (jj=ii; jj<16; jj+=4) { 1010 bap[jj] += *value++; 1011 } 1012 } 1013 goto noinsert2; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column index thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 for (ii=N; ii>=i; ii--) { 1020 rp[ii+1] = rp[ii]; 1021 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1022 } 1023 if (N >= i) { 1024 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1025 } 1026 rp[i] = col; 1027 bap = ap + 16*i; 1028 for (ii=0; ii<4; ii++,value+=stepval) { 1029 for (jj=ii; jj<16; jj+=4) { 1030 bap[jj] = *value++; 1031 } 1032 } 1033 noinsert2:; 1034 low = i; 1035 } 1036 ailen[row] = nrow; 1037 } 1038 PetscFunctionReturnVoid(); 1039 } 1040 EXTERN_C_END 1041 1042 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1043 #define matsetvalues4_ MATSETVALUES4 1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1045 #define matsetvalues4_ matsetvalues4 1046 #endif 1047 1048 EXTERN_C_BEGIN 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatSetValues4_" 1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1052 { 1053 Mat A = *AA; 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1056 PetscInt *ai=a->i,*ailen=a->ilen; 1057 PetscInt *aj=a->j,brow,bcol; 1058 PetscInt ridx,cidx,lastcol = -1; 1059 MatScalar *ap,value,*aa=a->a,*bap; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<m; k++) { /* loop over added rows */ 1063 row = im[k]; brow = row/4; 1064 rp = aj + ai[brow]; 1065 ap = aa + 16*ai[brow]; 1066 nrow = ailen[brow]; 1067 low = 0; 1068 high = nrow; 1069 for (l=0; l<n; l++) { /* loop over added columns */ 1070 col = in[l]; bcol = col/4; 1071 ridx = row % 4; cidx = col % 4; 1072 value = v[l + k*n]; 1073 if (col <= lastcol) low = 0; else high = nrow; 1074 lastcol = col; 1075 while (high-low > 7) { 1076 t = (low+high)/2; 1077 if (rp[t] > bcol) high = t; 1078 else low = t; 1079 } 1080 for (i=low; i<high; i++) { 1081 if (rp[i] > bcol) break; 1082 if (rp[i] == bcol) { 1083 bap = ap + 16*i + 4*cidx + ridx; 1084 *bap += value; 1085 goto noinsert1; 1086 } 1087 } 1088 N = nrow++ - 1; 1089 high++; /* added new column thus must search to one higher than before */ 1090 /* shift up all the later entries in this row */ 1091 for (ii=N; ii>=i; ii--) { 1092 rp[ii+1] = rp[ii]; 1093 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1094 } 1095 if (N>=i) { 1096 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1097 } 1098 rp[i] = bcol; 1099 ap[16*i + 4*cidx + ridx] = value; 1100 noinsert1:; 1101 low = i; 1102 } 1103 ailen[brow] = nrow; 1104 } 1105 PetscFunctionReturnVoid(); 1106 } 1107 EXTERN_C_END 1108 1109 /* 1110 Checks for missing diagonals 1111 */ 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt *diag,*jj = a->j,i; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1122 *missing = PETSC_FALSE; 1123 if (A->rmap->n > 0 && !jj) { 1124 *missing = PETSC_TRUE; 1125 if (d) *d = 0; 1126 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1127 } else { 1128 diag = a->diag; 1129 for (i=0; i<a->mbs; i++) { 1130 if (jj[diag[i]] != i) { 1131 *missing = PETSC_TRUE; 1132 if (d) *d = i; 1133 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1134 } 1135 } 1136 } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1143 { 1144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscInt i,j,m = a->mbs; 1147 1148 PetscFunctionBegin; 1149 if (!a->diag) { 1150 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1151 } 1152 for (i=0; i<m; i++) { 1153 a->diag[i] = a->i[i+1]; 1154 for (j=a->i[i]; j<a->i[i+1]; j++) { 1155 if (a->j[j] == i) { 1156 a->diag[i] = j; 1157 break; 1158 } 1159 } 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1170 { 1171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1172 PetscErrorCode ierr; 1173 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1174 PetscInt *tia, *tja; 1175 1176 PetscFunctionBegin; 1177 *nn = n; 1178 if (!ia) PetscFunctionReturn(0); 1179 if (symmetric) { 1180 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1181 } else { 1182 tia = a->i; tja = a->j; 1183 } 1184 1185 if (!blockcompressed && bs > 1) { 1186 (*nn) *= bs; 1187 nbs = bs; 1188 /* malloc & create the natural set of indices */ 1189 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1190 if (n) { 1191 (*ia)[0] = 0; 1192 for (j=1; j<bs; j++) { 1193 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1194 } 1195 } 1196 1197 for (i=1; i<n; i++) { 1198 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1199 for (j=1; j<bs; j++) { 1200 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1201 } 1202 } 1203 if (n) { 1204 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1205 } 1206 1207 if (ja) { 1208 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<n; i++) { 1211 for (j=0; j<bs; j++) { 1212 for (k=tia[i]; k<tia[i+1]; k++) { 1213 for (l=0; l<bs; l++) { 1214 (*ja)[cnt++] = bs*tja[k] + l; 1215 } 1216 } 1217 } 1218 } 1219 } 1220 1221 n *= bs; 1222 nz *= bs*bs; 1223 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1224 ierr = PetscFree(tia);CHKERRQ(ierr); 1225 ierr = PetscFree(tja);CHKERRQ(ierr); 1226 } 1227 } else { 1228 *ia = tia; 1229 if (ja) *ja = tja; 1230 } 1231 if (oshift == 1) { 1232 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1233 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1240 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1241 { 1242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1243 PetscErrorCode ierr; 1244 PetscInt i,n = a->mbs,nz = a->i[n]; 1245 1246 PetscFunctionBegin; 1247 if (!ia) PetscFunctionReturn(0); 1248 if (!blockcompressed && A->rmap->bs > 1) { 1249 ierr = PetscFree(*ia);CHKERRQ(ierr); 1250 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1251 } else if (symmetric) { 1252 ierr = PetscFree(*ia);CHKERRQ(ierr); 1253 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1254 } else if (oshift == 1) { /* blockcompressed */ 1255 for (i=0; i<n+1; i++) a->i[i]--; 1256 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1263 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1264 { 1265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 #if defined(PETSC_USE_LOG) 1270 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1271 #endif 1272 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1273 if (a->row) { 1274 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1275 } 1276 if (a->col) { 1277 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1278 } 1279 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1280 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1281 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1282 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1283 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1284 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1285 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1286 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1287 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1288 1289 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1290 ierr = PetscFree(a);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1306 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1307 { 1308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 switch (op) { 1313 case MAT_ROW_ORIENTED: 1314 a->roworiented = flg; 1315 break; 1316 case MAT_KEEP_NONZERO_PATTERN: 1317 a->keepnonzeropattern = flg; 1318 break; 1319 case MAT_NEW_NONZERO_LOCATIONS: 1320 a->nonew = (flg ? 0 : 1); 1321 break; 1322 case MAT_NEW_NONZERO_LOCATION_ERR: 1323 a->nonew = (flg ? -1 : 0); 1324 break; 1325 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1326 a->nonew = (flg ? -2 : 0); 1327 break; 1328 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1329 a->nounused = (flg ? -1 : 0); 1330 break; 1331 case MAT_NEW_DIAGONALS: 1332 case MAT_IGNORE_OFF_PROC_ENTRIES: 1333 case MAT_USE_HASH_TABLE: 1334 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1335 break; 1336 case MAT_SYMMETRIC: 1337 case MAT_STRUCTURALLY_SYMMETRIC: 1338 case MAT_HERMITIAN: 1339 case MAT_SYMMETRY_ETERNAL: 1340 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1341 break; 1342 default: 1343 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1350 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1351 { 1352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1353 PetscErrorCode ierr; 1354 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1355 MatScalar *aa,*aa_i; 1356 PetscScalar *v_i; 1357 1358 PetscFunctionBegin; 1359 bs = A->rmap->bs; 1360 ai = a->i; 1361 aj = a->j; 1362 aa = a->a; 1363 bs2 = a->bs2; 1364 1365 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1366 1367 bn = row/bs; /* Block number */ 1368 bp = row % bs; /* Block Position */ 1369 M = ai[bn+1] - ai[bn]; 1370 *nz = bs*M; 1371 1372 if (v) { 1373 *v = 0; 1374 if (*nz) { 1375 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1376 for (i=0; i<M; i++) { /* for each block in the block row */ 1377 v_i = *v + i*bs; 1378 aa_i = aa + bs2*(ai[bn] + i); 1379 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1380 } 1381 } 1382 } 1383 1384 if (idx) { 1385 *idx = 0; 1386 if (*nz) { 1387 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1388 for (i=0; i<M; i++) { /* for each block in the block row */ 1389 idx_i = *idx + i*bs; 1390 itmp = bs*aj[ai[bn] + i]; 1391 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1392 } 1393 } 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1400 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1401 { 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1406 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1412 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1413 { 1414 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1415 Mat C; 1416 PetscErrorCode ierr; 1417 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1418 PetscInt *rows,*cols,bs2=a->bs2; 1419 MatScalar *array; 1420 1421 PetscFunctionBegin; 1422 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1423 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1424 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1425 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1426 1427 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1428 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1429 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1430 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1431 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1432 ierr = PetscFree(col);CHKERRQ(ierr); 1433 } else { 1434 C = *B; 1435 } 1436 1437 array = a->a; 1438 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1439 cols = rows + bs; 1440 for (i=0; i<mbs; i++) { 1441 cols[0] = i*bs; 1442 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1443 len = ai[i+1] - ai[i]; 1444 for (j=0; j<len; j++) { 1445 rows[0] = (*aj++)*bs; 1446 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1447 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1448 array += bs2; 1449 } 1450 } 1451 ierr = PetscFree(rows);CHKERRQ(ierr); 1452 1453 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 1456 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1457 *B = C; 1458 } else { 1459 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1466 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1469 PetscErrorCode ierr; 1470 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1471 int fd; 1472 PetscScalar *aa; 1473 FILE *file; 1474 1475 PetscFunctionBegin; 1476 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1477 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1478 col_lens[0] = MAT_FILE_COOKIE; 1479 1480 col_lens[1] = A->rmap->N; 1481 col_lens[2] = A->cmap->n; 1482 col_lens[3] = a->nz*bs2; 1483 1484 /* store lengths of each row and write (including header) to file */ 1485 count = 0; 1486 for (i=0; i<a->mbs; i++) { 1487 for (j=0; j<bs; j++) { 1488 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1489 } 1490 } 1491 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1492 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1493 1494 /* store column indices (zero start index) */ 1495 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1496 count = 0; 1497 for (i=0; i<a->mbs; i++) { 1498 for (j=0; j<bs; j++) { 1499 for (k=a->i[i]; k<a->i[i+1]; k++) { 1500 for (l=0; l<bs; l++) { 1501 jj[count++] = bs*a->j[k] + l; 1502 } 1503 } 1504 } 1505 } 1506 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1507 ierr = PetscFree(jj);CHKERRQ(ierr); 1508 1509 /* store nonzero values */ 1510 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1511 count = 0; 1512 for (i=0; i<a->mbs; i++) { 1513 for (j=0; j<bs; j++) { 1514 for (k=a->i[i]; k<a->i[i+1]; k++) { 1515 for (l=0; l<bs; l++) { 1516 aa[count++] = a->a[bs2*k + l*bs + j]; 1517 } 1518 } 1519 } 1520 } 1521 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1522 ierr = PetscFree(aa);CHKERRQ(ierr); 1523 1524 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1525 if (file) { 1526 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1527 } 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1533 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1534 { 1535 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1538 PetscViewerFormat format; 1539 1540 PetscFunctionBegin; 1541 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1542 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1543 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1544 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1545 Mat aij; 1546 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1547 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1548 ierr = MatDestroy(aij);CHKERRQ(ierr); 1549 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1550 PetscFunctionReturn(0); 1551 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1552 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1553 for (i=0; i<a->mbs; i++) { 1554 for (j=0; j<bs; j++) { 1555 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1556 for (k=a->i[i]; k<a->i[i+1]; k++) { 1557 for (l=0; l<bs; l++) { 1558 #if defined(PETSC_USE_COMPLEX) 1559 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1561 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1562 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1564 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1567 } 1568 #else 1569 if (a->a[bs2*k + l*bs + j] != 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1571 } 1572 #endif 1573 } 1574 } 1575 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1576 } 1577 } 1578 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1579 } else { 1580 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1581 for (i=0; i<a->mbs; i++) { 1582 for (j=0; j<bs; j++) { 1583 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1584 for (k=a->i[i]; k<a->i[i+1]; k++) { 1585 for (l=0; l<bs; l++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1588 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1589 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1590 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1591 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1592 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1593 } else { 1594 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1595 } 1596 #else 1597 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1598 #endif 1599 } 1600 } 1601 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1602 } 1603 } 1604 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1605 } 1606 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1612 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1613 { 1614 Mat A = (Mat) Aa; 1615 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1616 PetscErrorCode ierr; 1617 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1618 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1619 MatScalar *aa; 1620 PetscViewer viewer; 1621 1622 PetscFunctionBegin; 1623 1624 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1625 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1626 1627 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1628 1629 /* loop over matrix elements drawing boxes */ 1630 color = PETSC_DRAW_BLUE; 1631 for (i=0,row=0; i<mbs; i++,row+=bs) { 1632 for (j=a->i[i]; j<a->i[i+1]; j++) { 1633 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1634 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1635 aa = a->a + j*bs2; 1636 for (k=0; k<bs; k++) { 1637 for (l=0; l<bs; l++) { 1638 if (PetscRealPart(*aa++) >= 0.) continue; 1639 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1640 } 1641 } 1642 } 1643 } 1644 color = PETSC_DRAW_CYAN; 1645 for (i=0,row=0; i<mbs; i++,row+=bs) { 1646 for (j=a->i[i]; j<a->i[i+1]; j++) { 1647 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1648 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1649 aa = a->a + j*bs2; 1650 for (k=0; k<bs; k++) { 1651 for (l=0; l<bs; l++) { 1652 if (PetscRealPart(*aa++) != 0.) continue; 1653 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1654 } 1655 } 1656 } 1657 } 1658 1659 color = PETSC_DRAW_RED; 1660 for (i=0,row=0; i<mbs; i++,row+=bs) { 1661 for (j=a->i[i]; j<a->i[i+1]; j++) { 1662 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1663 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1664 aa = a->a + j*bs2; 1665 for (k=0; k<bs; k++) { 1666 for (l=0; l<bs; l++) { 1667 if (PetscRealPart(*aa++) <= 0.) continue; 1668 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1669 } 1670 } 1671 } 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1679 { 1680 PetscErrorCode ierr; 1681 PetscReal xl,yl,xr,yr,w,h; 1682 PetscDraw draw; 1683 PetscTruth isnull; 1684 1685 PetscFunctionBegin; 1686 1687 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1688 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1689 1690 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1691 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1692 xr += w; yr += h; xl = -w; yl = -h; 1693 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1694 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1695 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatView_SeqBAIJ" 1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1702 { 1703 PetscErrorCode ierr; 1704 PetscTruth iascii,isbinary,isdraw; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1708 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1709 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1710 if (iascii){ 1711 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1712 } else if (isbinary) { 1713 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1714 } else if (isdraw) { 1715 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1716 } else { 1717 Mat B; 1718 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1719 ierr = MatView(B,viewer);CHKERRQ(ierr); 1720 ierr = MatDestroy(B);CHKERRQ(ierr); 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1728 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1729 { 1730 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1731 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1732 PetscInt *ai = a->i,*ailen = a->ilen; 1733 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1734 MatScalar *ap,*aa = a->a; 1735 1736 PetscFunctionBegin; 1737 for (k=0; k<m; k++) { /* loop over rows */ 1738 row = im[k]; brow = row/bs; 1739 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1740 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1741 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1742 nrow = ailen[brow]; 1743 for (l=0; l<n; l++) { /* loop over columns */ 1744 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1745 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1746 col = in[l] ; 1747 bcol = col/bs; 1748 cidx = col%bs; 1749 ridx = row%bs; 1750 high = nrow; 1751 low = 0; /* assume unsorted */ 1752 while (high-low > 5) { 1753 t = (low+high)/2; 1754 if (rp[t] > bcol) high = t; 1755 else low = t; 1756 } 1757 for (i=low; i<high; i++) { 1758 if (rp[i] > bcol) break; 1759 if (rp[i] == bcol) { 1760 *v++ = ap[bs2*i+bs*cidx+ridx]; 1761 goto finished; 1762 } 1763 } 1764 *v++ = 0.0; 1765 finished:; 1766 } 1767 } 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #define CHUNKSIZE 10 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1774 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1775 { 1776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1777 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1778 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1779 PetscErrorCode ierr; 1780 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1781 PetscTruth roworiented=a->roworiented; 1782 const PetscScalar *value = v; 1783 MatScalar *ap,*aa = a->a,*bap; 1784 1785 PetscFunctionBegin; 1786 if (roworiented) { 1787 stepval = (n-1)*bs; 1788 } else { 1789 stepval = (m-1)*bs; 1790 } 1791 for (k=0; k<m; k++) { /* loop over added rows */ 1792 row = im[k]; 1793 if (row < 0) continue; 1794 #if defined(PETSC_USE_DEBUG) 1795 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1796 #endif 1797 rp = aj + ai[row]; 1798 ap = aa + bs2*ai[row]; 1799 rmax = imax[row]; 1800 nrow = ailen[row]; 1801 low = 0; 1802 high = nrow; 1803 for (l=0; l<n; l++) { /* loop over added columns */ 1804 if (in[l] < 0) continue; 1805 #if defined(PETSC_USE_DEBUG) 1806 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1807 #endif 1808 col = in[l]; 1809 if (roworiented) { 1810 value = v + k*(stepval+bs)*bs + l*bs; 1811 } else { 1812 value = v + l*(stepval+bs)*bs + k*bs; 1813 } 1814 if (col <= lastcol) low = 0; else high = nrow; 1815 lastcol = col; 1816 while (high-low > 7) { 1817 t = (low+high)/2; 1818 if (rp[t] > col) high = t; 1819 else low = t; 1820 } 1821 for (i=low; i<high; i++) { 1822 if (rp[i] > col) break; 1823 if (rp[i] == col) { 1824 bap = ap + bs2*i; 1825 if (roworiented) { 1826 if (is == ADD_VALUES) { 1827 for (ii=0; ii<bs; ii++,value+=stepval) { 1828 for (jj=ii; jj<bs2; jj+=bs) { 1829 bap[jj] += *value++; 1830 } 1831 } 1832 } else { 1833 for (ii=0; ii<bs; ii++,value+=stepval) { 1834 for (jj=ii; jj<bs2; jj+=bs) { 1835 bap[jj] = *value++; 1836 } 1837 } 1838 } 1839 } else { 1840 if (is == ADD_VALUES) { 1841 for (ii=0; ii<bs; ii++,value+=stepval) { 1842 for (jj=0; jj<bs; jj++) { 1843 *bap++ += *value++; 1844 } 1845 } 1846 } else { 1847 for (ii=0; ii<bs; ii++,value+=stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 *bap++ = *value++; 1850 } 1851 } 1852 } 1853 } 1854 goto noinsert2; 1855 } 1856 } 1857 if (nonew == 1) goto noinsert2; 1858 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1859 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1860 N = nrow++ - 1; high++; 1861 /* shift up all the later entries in this row */ 1862 for (ii=N; ii>=i; ii--) { 1863 rp[ii+1] = rp[ii]; 1864 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1865 } 1866 if (N >= i) { 1867 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1868 } 1869 rp[i] = col; 1870 bap = ap + bs2*i; 1871 if (roworiented) { 1872 for (ii=0; ii<bs; ii++,value+=stepval) { 1873 for (jj=ii; jj<bs2; jj+=bs) { 1874 bap[jj] = *value++; 1875 } 1876 } 1877 } else { 1878 for (ii=0; ii<bs; ii++,value+=stepval) { 1879 for (jj=0; jj<bs; jj++) { 1880 *bap++ = *value++; 1881 } 1882 } 1883 } 1884 noinsert2:; 1885 low = i; 1886 } 1887 ailen[row] = nrow; 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1895 { 1896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1897 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1898 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1899 PetscErrorCode ierr; 1900 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1901 MatScalar *aa = a->a,*ap; 1902 PetscReal ratio=0.6; 1903 1904 PetscFunctionBegin; 1905 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1906 1907 if (m) rmax = ailen[0]; 1908 for (i=1; i<mbs; i++) { 1909 /* move each row back by the amount of empty slots (fshift) before it*/ 1910 fshift += imax[i-1] - ailen[i-1]; 1911 rmax = PetscMax(rmax,ailen[i]); 1912 if (fshift) { 1913 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1914 N = ailen[i]; 1915 for (j=0; j<N; j++) { 1916 ip[j-fshift] = ip[j]; 1917 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1918 } 1919 } 1920 ai[i] = ai[i-1] + ailen[i-1]; 1921 } 1922 if (mbs) { 1923 fshift += imax[mbs-1] - ailen[mbs-1]; 1924 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1925 } 1926 /* reset ilen and imax for each row */ 1927 for (i=0; i<mbs; i++) { 1928 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1929 } 1930 a->nz = ai[mbs]; 1931 1932 /* diagonals may have moved, so kill the diagonal pointers */ 1933 a->idiagvalid = PETSC_FALSE; 1934 if (fshift && a->diag) { 1935 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1936 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1937 a->diag = 0; 1938 } 1939 if (fshift && a->nounused == -1) { 1940 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1941 } 1942 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1943 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1944 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1945 a->reallocs = 0; 1946 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1947 1948 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1949 if (a->compressedrow.use){ 1950 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1951 } 1952 1953 A->same_nonzero = PETSC_TRUE; 1954 PetscFunctionReturn(0); 1955 } 1956 1957 /* 1958 This function returns an array of flags which indicate the locations of contiguous 1959 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1960 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1961 Assume: sizes should be long enough to hold all the values. 1962 */ 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1965 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1966 { 1967 PetscInt i,j,k,row; 1968 PetscTruth flg; 1969 1970 PetscFunctionBegin; 1971 for (i=0,j=0; i<n; j++) { 1972 row = idx[i]; 1973 if (row%bs!=0) { /* Not the begining of a block */ 1974 sizes[j] = 1; 1975 i++; 1976 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1977 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1978 i++; 1979 } else { /* Begining of the block, so check if the complete block exists */ 1980 flg = PETSC_TRUE; 1981 for (k=1; k<bs; k++) { 1982 if (row+k != idx[i+k]) { /* break in the block */ 1983 flg = PETSC_FALSE; 1984 break; 1985 } 1986 } 1987 if (flg) { /* No break in the bs */ 1988 sizes[j] = bs; 1989 i+= bs; 1990 } else { 1991 sizes[j] = 1; 1992 i++; 1993 } 1994 } 1995 } 1996 *bs_max = j; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2002 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2003 { 2004 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2005 PetscErrorCode ierr; 2006 PetscInt i,j,k,count,*rows; 2007 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2008 PetscScalar zero = 0.0; 2009 MatScalar *aa; 2010 2011 PetscFunctionBegin; 2012 /* Make a copy of the IS and sort it */ 2013 /* allocate memory for rows,sizes */ 2014 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2015 sizes = rows + is_n; 2016 2017 /* copy IS values to rows, and sort them */ 2018 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2019 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2020 if (baij->keepnonzeropattern) { 2021 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2022 bs_max = is_n; 2023 A->same_nonzero = PETSC_TRUE; 2024 } else { 2025 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2026 A->same_nonzero = PETSC_FALSE; 2027 } 2028 2029 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2030 row = rows[j]; 2031 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2032 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2033 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2034 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2035 if (diag != 0.0) { 2036 if (baij->ilen[row/bs] > 0) { 2037 baij->ilen[row/bs] = 1; 2038 baij->j[baij->i[row/bs]] = row/bs; 2039 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2040 } 2041 /* Now insert all the diagonal values for this bs */ 2042 for (k=0; k<bs; k++) { 2043 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2044 } 2045 } else { /* (diag == 0.0) */ 2046 baij->ilen[row/bs] = 0; 2047 } /* end (diag == 0.0) */ 2048 } else { /* (sizes[i] != bs) */ 2049 #if defined (PETSC_USE_DEBUG) 2050 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2051 #endif 2052 for (k=0; k<count; k++) { 2053 aa[0] = zero; 2054 aa += bs; 2055 } 2056 if (diag != 0.0) { 2057 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2058 } 2059 } 2060 } 2061 2062 ierr = PetscFree(rows);CHKERRQ(ierr); 2063 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2069 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2070 { 2071 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2072 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2073 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2074 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2075 PetscErrorCode ierr; 2076 PetscInt ridx,cidx,bs2=a->bs2; 2077 PetscTruth roworiented=a->roworiented; 2078 MatScalar *ap,value,*aa=a->a,*bap; 2079 2080 PetscFunctionBegin; 2081 for (k=0; k<m; k++) { /* loop over added rows */ 2082 row = im[k]; 2083 brow = row/bs; 2084 if (row < 0) continue; 2085 #if defined(PETSC_USE_DEBUG) 2086 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2087 #endif 2088 rp = aj + ai[brow]; 2089 ap = aa + bs2*ai[brow]; 2090 rmax = imax[brow]; 2091 nrow = ailen[brow]; 2092 low = 0; 2093 high = nrow; 2094 for (l=0; l<n; l++) { /* loop over added columns */ 2095 if (in[l] < 0) continue; 2096 #if defined(PETSC_USE_DEBUG) 2097 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2098 #endif 2099 col = in[l]; bcol = col/bs; 2100 ridx = row % bs; cidx = col % bs; 2101 if (roworiented) { 2102 value = v[l + k*n]; 2103 } else { 2104 value = v[k + l*m]; 2105 } 2106 if (col <= lastcol) low = 0; else high = nrow; 2107 lastcol = col; 2108 while (high-low > 7) { 2109 t = (low+high)/2; 2110 if (rp[t] > bcol) high = t; 2111 else low = t; 2112 } 2113 for (i=low; i<high; i++) { 2114 if (rp[i] > bcol) break; 2115 if (rp[i] == bcol) { 2116 bap = ap + bs2*i + bs*cidx + ridx; 2117 if (is == ADD_VALUES) *bap += value; 2118 else *bap = value; 2119 goto noinsert1; 2120 } 2121 } 2122 if (nonew == 1) goto noinsert1; 2123 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2124 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2125 N = nrow++ - 1; high++; 2126 /* shift up all the later entries in this row */ 2127 for (ii=N; ii>=i; ii--) { 2128 rp[ii+1] = rp[ii]; 2129 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2130 } 2131 if (N>=i) { 2132 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2133 } 2134 rp[i] = bcol; 2135 ap[bs2*i + bs*cidx + ridx] = value; 2136 a->nz++; 2137 noinsert1:; 2138 low = i; 2139 } 2140 ailen[brow] = nrow; 2141 } 2142 A->same_nonzero = PETSC_FALSE; 2143 PetscFunctionReturn(0); 2144 } 2145 2146 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2150 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2151 { 2152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2153 Mat outA; 2154 PetscErrorCode ierr; 2155 PetscTruth row_identity,col_identity; 2156 2157 PetscFunctionBegin; 2158 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2159 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2160 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2161 if (!row_identity || !col_identity) { 2162 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2163 } 2164 2165 outA = inA; 2166 inA->factor = MAT_FACTOR_LU; 2167 2168 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2169 2170 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2171 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2172 a->row = row; 2173 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2174 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2175 a->col = col; 2176 2177 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2178 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2179 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2180 2181 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2182 if (!a->solve_work) { 2183 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2184 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2185 } 2186 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2187 2188 PetscFunctionReturn(0); 2189 } 2190 2191 EXTERN_C_BEGIN 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2194 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2195 { 2196 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2197 PetscInt i,nz,mbs; 2198 2199 PetscFunctionBegin; 2200 nz = baij->maxnz/baij->bs2; 2201 mbs = baij->mbs; 2202 for (i=0; i<nz; i++) { 2203 baij->j[i] = indices[i]; 2204 } 2205 baij->nz = nz; 2206 for (i=0; i<mbs; i++) { 2207 baij->ilen[i] = baij->imax[i]; 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 EXTERN_C_END 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2215 /*@ 2216 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2217 in the matrix. 2218 2219 Input Parameters: 2220 + mat - the SeqBAIJ matrix 2221 - indices - the column indices 2222 2223 Level: advanced 2224 2225 Notes: 2226 This can be called if you have precomputed the nonzero structure of the 2227 matrix and want to provide it to the matrix object to improve the performance 2228 of the MatSetValues() operation. 2229 2230 You MUST have set the correct numbers of nonzeros per row in the call to 2231 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2232 2233 MUST be called before any calls to MatSetValues(); 2234 2235 @*/ 2236 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2237 { 2238 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2239 2240 PetscFunctionBegin; 2241 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2242 PetscValidPointer(indices,2); 2243 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2244 if (f) { 2245 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2246 } else { 2247 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2254 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2255 { 2256 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2257 PetscErrorCode ierr; 2258 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2259 PetscReal atmp; 2260 PetscScalar *x,zero = 0.0; 2261 MatScalar *aa; 2262 PetscInt ncols,brow,krow,kcol; 2263 2264 PetscFunctionBegin; 2265 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2266 bs = A->rmap->bs; 2267 aa = a->a; 2268 ai = a->i; 2269 aj = a->j; 2270 mbs = a->mbs; 2271 2272 ierr = VecSet(v,zero);CHKERRQ(ierr); 2273 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2274 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2275 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2276 for (i=0; i<mbs; i++) { 2277 ncols = ai[1] - ai[0]; ai++; 2278 brow = bs*i; 2279 for (j=0; j<ncols; j++){ 2280 for (kcol=0; kcol<bs; kcol++){ 2281 for (krow=0; krow<bs; krow++){ 2282 atmp = PetscAbsScalar(*aa);aa++; 2283 row = brow + krow; /* row index */ 2284 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2285 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2286 } 2287 } 2288 aj++; 2289 } 2290 } 2291 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatCopy_SeqBAIJ" 2297 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2298 { 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 /* If the two matrices have the same copy implementation, use fast copy. */ 2303 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2304 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2305 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2306 2307 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2308 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2309 } 2310 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2311 } else { 2312 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2313 } 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2319 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2320 { 2321 PetscErrorCode ierr; 2322 2323 PetscFunctionBegin; 2324 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2330 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2331 { 2332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2333 PetscFunctionBegin; 2334 *array = a->a; 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2340 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2341 { 2342 PetscFunctionBegin; 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #include "petscblaslapack.h" 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2349 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2350 { 2351 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2352 PetscErrorCode ierr; 2353 PetscInt i,bs=Y->rmap->bs,j,bs2; 2354 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2355 2356 PetscFunctionBegin; 2357 if (str == SAME_NONZERO_PATTERN) { 2358 PetscScalar alpha = a; 2359 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2360 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2361 if (y->xtoy && y->XtoY != X) { 2362 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2363 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2364 } 2365 if (!y->xtoy) { /* get xtoy */ 2366 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2367 y->XtoY = X; 2368 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2369 } 2370 bs2 = bs*bs; 2371 for (i=0; i<x->nz; i++) { 2372 j = 0; 2373 while (j < bs2){ 2374 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2375 j++; 2376 } 2377 } 2378 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2379 } else { 2380 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2387 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2388 { 2389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2390 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2391 MatScalar *aa = a->a; 2392 2393 PetscFunctionBegin; 2394 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2400 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2401 { 2402 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2403 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2404 MatScalar *aa = a->a; 2405 2406 PetscFunctionBegin; 2407 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2415 /* 2416 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2417 */ 2418 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2419 { 2420 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2421 PetscErrorCode ierr; 2422 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2423 PetscInt nz = a->i[m],row,*jj,mr,col; 2424 2425 PetscFunctionBegin; 2426 *nn = n; 2427 if (!ia) PetscFunctionReturn(0); 2428 if (symmetric) { 2429 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2430 } else { 2431 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2432 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2433 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2434 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2435 jj = a->j; 2436 for (i=0; i<nz; i++) { 2437 collengths[jj[i]]++; 2438 } 2439 cia[0] = oshift; 2440 for (i=0; i<n; i++) { 2441 cia[i+1] = cia[i] + collengths[i]; 2442 } 2443 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2444 jj = a->j; 2445 for (row=0; row<m; row++) { 2446 mr = a->i[row+1] - a->i[row]; 2447 for (i=0; i<mr; i++) { 2448 col = *jj++; 2449 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2450 } 2451 } 2452 ierr = PetscFree(collengths);CHKERRQ(ierr); 2453 *ia = cia; *ja = cja; 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 #undef __FUNCT__ 2459 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2460 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2461 { 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 if (!ia) PetscFunctionReturn(0); 2466 ierr = PetscFree(*ia);CHKERRQ(ierr); 2467 ierr = PetscFree(*ja);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 2472 /* -------------------------------------------------------------------*/ 2473 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2474 MatGetRow_SeqBAIJ, 2475 MatRestoreRow_SeqBAIJ, 2476 MatMult_SeqBAIJ_N, 2477 /* 4*/ MatMultAdd_SeqBAIJ_N, 2478 MatMultTranspose_SeqBAIJ, 2479 MatMultTransposeAdd_SeqBAIJ, 2480 0, 2481 0, 2482 0, 2483 /*10*/ 0, 2484 MatLUFactor_SeqBAIJ, 2485 0, 2486 0, 2487 MatTranspose_SeqBAIJ, 2488 /*15*/ MatGetInfo_SeqBAIJ, 2489 MatEqual_SeqBAIJ, 2490 MatGetDiagonal_SeqBAIJ, 2491 MatDiagonalScale_SeqBAIJ, 2492 MatNorm_SeqBAIJ, 2493 /*20*/ 0, 2494 MatAssemblyEnd_SeqBAIJ, 2495 MatSetOption_SeqBAIJ, 2496 MatZeroEntries_SeqBAIJ, 2497 /*24*/ MatZeroRows_SeqBAIJ, 2498 0, 2499 0, 2500 0, 2501 0, 2502 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2503 0, 2504 0, 2505 MatGetArray_SeqBAIJ, 2506 MatRestoreArray_SeqBAIJ, 2507 /*34*/ MatDuplicate_SeqBAIJ, 2508 0, 2509 0, 2510 MatILUFactor_SeqBAIJ, 2511 0, 2512 /*39*/ MatAXPY_SeqBAIJ, 2513 MatGetSubMatrices_SeqBAIJ, 2514 MatIncreaseOverlap_SeqBAIJ, 2515 MatGetValues_SeqBAIJ, 2516 MatCopy_SeqBAIJ, 2517 /*44*/ 0, 2518 MatScale_SeqBAIJ, 2519 0, 2520 0, 2521 MatILUDTFactor_SeqBAIJ, 2522 /*49*/ 0, 2523 MatGetRowIJ_SeqBAIJ, 2524 MatRestoreRowIJ_SeqBAIJ, 2525 MatGetColumnIJ_SeqBAIJ, 2526 MatRestoreColumnIJ_SeqBAIJ, 2527 /*54*/ MatFDColoringCreate_SeqAIJ, 2528 0, 2529 0, 2530 0, 2531 MatSetValuesBlocked_SeqBAIJ, 2532 /*59*/ MatGetSubMatrix_SeqBAIJ, 2533 MatDestroy_SeqBAIJ, 2534 MatView_SeqBAIJ, 2535 0, 2536 0, 2537 /*64*/ 0, 2538 0, 2539 0, 2540 0, 2541 0, 2542 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2543 0, 2544 MatConvert_Basic, 2545 0, 2546 0, 2547 /*74*/ 0, 2548 0, 2549 0, 2550 0, 2551 0, 2552 /*79*/ 0, 2553 0, 2554 0, 2555 0, 2556 MatLoad_SeqBAIJ, 2557 /*84*/ 0, 2558 0, 2559 0, 2560 0, 2561 0, 2562 /*89*/ 0, 2563 0, 2564 0, 2565 0, 2566 0, 2567 /*94*/ 0, 2568 0, 2569 0, 2570 0, 2571 0, 2572 /*99*/0, 2573 0, 2574 0, 2575 0, 2576 0, 2577 /*104*/0, 2578 MatRealPart_SeqBAIJ, 2579 MatImaginaryPart_SeqBAIJ, 2580 0, 2581 0, 2582 /*109*/0, 2583 0, 2584 0, 2585 0, 2586 MatMissingDiagonal_SeqBAIJ 2587 /*114*/ 2588 }; 2589 2590 EXTERN_C_BEGIN 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2593 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2594 { 2595 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2596 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 if (aij->nonew != 1) { 2601 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2602 } 2603 2604 /* allocate space for values if not already there */ 2605 if (!aij->saved_values) { 2606 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2607 } 2608 2609 /* copy values over */ 2610 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2611 PetscFunctionReturn(0); 2612 } 2613 EXTERN_C_END 2614 2615 EXTERN_C_BEGIN 2616 #undef __FUNCT__ 2617 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2618 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2619 { 2620 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2621 PetscErrorCode ierr; 2622 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2623 2624 PetscFunctionBegin; 2625 if (aij->nonew != 1) { 2626 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2627 } 2628 if (!aij->saved_values) { 2629 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2630 } 2631 2632 /* copy values over */ 2633 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2634 PetscFunctionReturn(0); 2635 } 2636 EXTERN_C_END 2637 2638 EXTERN_C_BEGIN 2639 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2640 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2641 EXTERN_C_END 2642 2643 EXTERN_C_BEGIN 2644 #undef __FUNCT__ 2645 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2646 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2647 { 2648 Mat_SeqBAIJ *b; 2649 PetscErrorCode ierr; 2650 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2651 PetscTruth flg,skipallocation = PETSC_FALSE; 2652 2653 PetscFunctionBegin; 2654 2655 if (nz == MAT_SKIP_ALLOCATION) { 2656 skipallocation = PETSC_TRUE; 2657 nz = 0; 2658 } 2659 2660 if (bs < 0) { 2661 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2662 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2663 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2664 bs = PetscAbs(bs); 2665 } 2666 if (nnz && newbs != bs) { 2667 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2668 } 2669 bs = newbs; 2670 2671 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2672 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2673 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2674 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2675 2676 B->preallocated = PETSC_TRUE; 2677 2678 mbs = B->rmap->n/bs; 2679 nbs = B->cmap->n/bs; 2680 bs2 = bs*bs; 2681 2682 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2683 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2684 } 2685 2686 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2687 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2688 if (nnz) { 2689 for (i=0; i<mbs; i++) { 2690 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2691 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2692 } 2693 } 2694 2695 b = (Mat_SeqBAIJ*)B->data; 2696 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2697 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2698 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2699 2700 if (!flg) { 2701 switch (bs) { 2702 case 1: 2703 B->ops->mult = MatMult_SeqBAIJ_1; 2704 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2705 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2706 break; 2707 case 2: 2708 B->ops->mult = MatMult_SeqBAIJ_2; 2709 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2710 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2711 break; 2712 case 3: 2713 B->ops->mult = MatMult_SeqBAIJ_3; 2714 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2715 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2716 break; 2717 case 4: 2718 B->ops->mult = MatMult_SeqBAIJ_4; 2719 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2720 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2721 break; 2722 case 5: 2723 B->ops->mult = MatMult_SeqBAIJ_5; 2724 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2725 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2726 break; 2727 case 6: 2728 B->ops->mult = MatMult_SeqBAIJ_6; 2729 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2730 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2731 break; 2732 case 7: 2733 B->ops->mult = MatMult_SeqBAIJ_7; 2734 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2735 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2736 break; 2737 default: 2738 B->ops->mult = MatMult_SeqBAIJ_N; 2739 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2740 break; 2741 } 2742 } 2743 B->rmap->bs = bs; 2744 b->mbs = mbs; 2745 b->nbs = nbs; 2746 if (!skipallocation) { 2747 if (!b->imax) { 2748 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2749 } 2750 /* b->ilen will count nonzeros in each block row so far. */ 2751 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2752 if (!nnz) { 2753 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2754 else if (nz <= 0) nz = 1; 2755 for (i=0; i<mbs; i++) b->imax[i] = nz; 2756 nz = nz*mbs; 2757 } else { 2758 nz = 0; 2759 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2760 } 2761 2762 /* allocate the matrix space */ 2763 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2764 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2765 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2766 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2767 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2768 b->singlemalloc = PETSC_TRUE; 2769 b->i[0] = 0; 2770 for (i=1; i<mbs+1; i++) { 2771 b->i[i] = b->i[i-1] + b->imax[i-1]; 2772 } 2773 b->free_a = PETSC_TRUE; 2774 b->free_ij = PETSC_TRUE; 2775 } else { 2776 b->free_a = PETSC_FALSE; 2777 b->free_ij = PETSC_FALSE; 2778 } 2779 2780 B->rmap->bs = bs; 2781 b->bs2 = bs2; 2782 b->mbs = mbs; 2783 b->nz = 0; 2784 b->maxnz = nz*bs2; 2785 B->info.nz_unneeded = (PetscReal)b->maxnz; 2786 PetscFunctionReturn(0); 2787 } 2788 EXTERN_C_END 2789 2790 EXTERN_C_BEGIN 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2793 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2794 { 2795 PetscInt i,m,nz,nz_max=0,*nnz; 2796 PetscScalar *values=0; 2797 PetscErrorCode ierr; 2798 2799 PetscFunctionBegin; 2800 2801 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2802 2803 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2804 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2805 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2806 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2807 m = B->rmap->n/bs; 2808 2809 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2810 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2811 for(i=0; i<m; i++) { 2812 nz = ii[i+1]- ii[i]; 2813 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2814 nz_max = PetscMax(nz_max, nz); 2815 nnz[i] = nz; 2816 } 2817 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2818 ierr = PetscFree(nnz);CHKERRQ(ierr); 2819 2820 values = (PetscScalar*)V; 2821 if (!values) { 2822 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2823 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2824 } 2825 for (i=0; i<m; i++) { 2826 PetscInt ncols = ii[i+1] - ii[i]; 2827 const PetscInt *icols = jj + ii[i]; 2828 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2829 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2830 } 2831 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2832 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 2835 PetscFunctionReturn(0); 2836 } 2837 EXTERN_C_END 2838 2839 2840 EXTERN_C_BEGIN 2841 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 2843 EXTERN_C_END 2844 2845 /*MC 2846 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2847 block sparse compressed row format. 2848 2849 Options Database Keys: 2850 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2851 2852 Level: beginner 2853 2854 .seealso: MatCreateSeqBAIJ() 2855 M*/ 2856 2857 2858 EXTERN_C_BEGIN 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatCreate_SeqBAIJ" 2861 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2862 { 2863 PetscErrorCode ierr; 2864 PetscMPIInt size; 2865 Mat_SeqBAIJ *b; 2866 2867 PetscFunctionBegin; 2868 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2869 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2870 2871 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2872 B->data = (void*)b; 2873 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2874 B->mapping = 0; 2875 b->row = 0; 2876 b->col = 0; 2877 b->icol = 0; 2878 b->reallocs = 0; 2879 b->saved_values = 0; 2880 2881 b->roworiented = PETSC_TRUE; 2882 b->nonew = 0; 2883 b->diag = 0; 2884 b->solve_work = 0; 2885 b->mult_work = 0; 2886 B->spptr = 0; 2887 B->info.nz_unneeded = (PetscReal)b->maxnz; 2888 b->keepnonzeropattern = PETSC_FALSE; 2889 b->xtoy = 0; 2890 b->XtoY = 0; 2891 b->compressedrow.use = PETSC_FALSE; 2892 b->compressedrow.nrows = 0; 2893 b->compressedrow.i = PETSC_NULL; 2894 b->compressedrow.rindex = PETSC_NULL; 2895 b->compressedrow.checked = PETSC_FALSE; 2896 B->same_nonzero = PETSC_FALSE; 2897 2898 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 2899 "MatGetFactorAvailable_seqbaij_petsc", 2900 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 2901 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2902 "MatGetFactor_seqbaij_petsc", 2903 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2904 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2905 "MatInvertBlockDiagonal_SeqBAIJ", 2906 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2907 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2908 "MatStoreValues_SeqBAIJ", 2909 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2910 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2911 "MatRetrieveValues_SeqBAIJ", 2912 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2913 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2914 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2915 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2916 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2917 "MatConvert_SeqBAIJ_SeqAIJ", 2918 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2919 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2920 "MatConvert_SeqBAIJ_SeqSBAIJ", 2921 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2922 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2923 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2924 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2925 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2926 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2927 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2928 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2929 PetscFunctionReturn(0); 2930 } 2931 EXTERN_C_END 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2935 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 2936 { 2937 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2940 2941 PetscFunctionBegin; 2942 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2943 2944 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2945 for (i=0; i<mbs; i++) { 2946 c->imax[i] = a->imax[i]; 2947 c->ilen[i] = a->ilen[i]; 2948 } 2949 2950 /* allocate the matrix space */ 2951 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2952 c->singlemalloc = PETSC_TRUE; 2953 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2954 if (mbs > 0) { 2955 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2956 if (cpvalues == MAT_COPY_VALUES) { 2957 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2958 } else { 2959 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2960 } 2961 } 2962 c->roworiented = a->roworiented; 2963 c->nonew = a->nonew; 2964 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2965 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2966 c->bs2 = a->bs2; 2967 c->mbs = a->mbs; 2968 c->nbs = a->nbs; 2969 2970 if (a->diag) { 2971 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2972 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2973 for (i=0; i<mbs; i++) { 2974 c->diag[i] = a->diag[i]; 2975 } 2976 } else c->diag = 0; 2977 c->nz = a->nz; 2978 c->maxnz = a->maxnz; 2979 c->solve_work = 0; 2980 c->mult_work = 0; 2981 c->free_a = PETSC_TRUE; 2982 c->free_ij = PETSC_TRUE; 2983 C->preallocated = PETSC_TRUE; 2984 C->assembled = PETSC_TRUE; 2985 2986 c->compressedrow.use = a->compressedrow.use; 2987 c->compressedrow.nrows = a->compressedrow.nrows; 2988 c->compressedrow.checked = a->compressedrow.checked; 2989 if ( a->compressedrow.checked && a->compressedrow.use){ 2990 i = a->compressedrow.nrows; 2991 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2992 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2993 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2994 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2995 } else { 2996 c->compressedrow.use = PETSC_FALSE; 2997 c->compressedrow.i = PETSC_NULL; 2998 c->compressedrow.rindex = PETSC_NULL; 2999 } 3000 C->same_nonzero = A->same_nonzero; 3001 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3002 PetscFunctionReturn(0); 3003 } 3004 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3007 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3008 { 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3013 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3014 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3015 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 3016 PetscFunctionReturn(0); 3017 } 3018 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "MatLoad_SeqBAIJ" 3021 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3022 { 3023 Mat_SeqBAIJ *a; 3024 Mat B; 3025 PetscErrorCode ierr; 3026 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3027 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3028 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3029 PetscInt *masked,nmask,tmp,bs2,ishift; 3030 PetscMPIInt size; 3031 int fd; 3032 PetscScalar *aa; 3033 MPI_Comm comm = ((PetscObject)viewer)->comm; 3034 3035 PetscFunctionBegin; 3036 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3037 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3038 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3039 bs2 = bs*bs; 3040 3041 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3042 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3043 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3044 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3045 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3046 M = header[1]; N = header[2]; nz = header[3]; 3047 3048 if (header[3] < 0) { 3049 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3050 } 3051 3052 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3053 3054 /* 3055 This code adds extra rows to make sure the number of rows is 3056 divisible by the blocksize 3057 */ 3058 mbs = M/bs; 3059 extra_rows = bs - M + bs*(mbs); 3060 if (extra_rows == bs) extra_rows = 0; 3061 else mbs++; 3062 if (extra_rows) { 3063 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3064 } 3065 3066 /* read in row lengths */ 3067 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3068 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3069 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3070 3071 /* read in column indices */ 3072 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3073 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3074 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3075 3076 /* loop over row lengths determining block row lengths */ 3077 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3078 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3079 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3080 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3081 masked = mask + mbs; 3082 rowcount = 0; nzcount = 0; 3083 for (i=0; i<mbs; i++) { 3084 nmask = 0; 3085 for (j=0; j<bs; j++) { 3086 kmax = rowlengths[rowcount]; 3087 for (k=0; k<kmax; k++) { 3088 tmp = jj[nzcount++]/bs; 3089 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3090 } 3091 rowcount++; 3092 } 3093 browlengths[i] += nmask; 3094 /* zero out the mask elements we set */ 3095 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3096 } 3097 3098 /* create our matrix */ 3099 ierr = MatCreate(comm,&B); 3100 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3101 ierr = MatSetType(B,type);CHKERRQ(ierr); 3102 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3103 a = (Mat_SeqBAIJ*)B->data; 3104 3105 /* set matrix "i" values */ 3106 a->i[0] = 0; 3107 for (i=1; i<= mbs; i++) { 3108 a->i[i] = a->i[i-1] + browlengths[i-1]; 3109 a->ilen[i-1] = browlengths[i-1]; 3110 } 3111 a->nz = 0; 3112 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3113 3114 /* read in nonzero values */ 3115 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3116 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3117 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3118 3119 /* set "a" and "j" values into matrix */ 3120 nzcount = 0; jcount = 0; 3121 for (i=0; i<mbs; i++) { 3122 nzcountb = nzcount; 3123 nmask = 0; 3124 for (j=0; j<bs; j++) { 3125 kmax = rowlengths[i*bs+j]; 3126 for (k=0; k<kmax; k++) { 3127 tmp = jj[nzcount++]/bs; 3128 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3129 } 3130 } 3131 /* sort the masked values */ 3132 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3133 3134 /* set "j" values into matrix */ 3135 maskcount = 1; 3136 for (j=0; j<nmask; j++) { 3137 a->j[jcount++] = masked[j]; 3138 mask[masked[j]] = maskcount++; 3139 } 3140 /* set "a" values into matrix */ 3141 ishift = bs2*a->i[i]; 3142 for (j=0; j<bs; j++) { 3143 kmax = rowlengths[i*bs+j]; 3144 for (k=0; k<kmax; k++) { 3145 tmp = jj[nzcountb]/bs ; 3146 block = mask[tmp] - 1; 3147 point = jj[nzcountb] - bs*tmp; 3148 idx = ishift + bs2*block + j + bs*point; 3149 a->a[idx] = (MatScalar)aa[nzcountb++]; 3150 } 3151 } 3152 /* zero out the mask elements we set */ 3153 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3154 } 3155 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3156 3157 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3158 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3159 ierr = PetscFree(aa);CHKERRQ(ierr); 3160 ierr = PetscFree(jj);CHKERRQ(ierr); 3161 ierr = PetscFree(mask);CHKERRQ(ierr); 3162 3163 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 ierr = MatView_Private(B);CHKERRQ(ierr); 3166 3167 *A = B; 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #undef __FUNCT__ 3172 #define __FUNCT__ "MatCreateSeqBAIJ" 3173 /*@C 3174 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3175 compressed row) format. For good matrix assembly performance the 3176 user should preallocate the matrix storage by setting the parameter nz 3177 (or the array nnz). By setting these parameters accurately, performance 3178 during matrix assembly can be increased by more than a factor of 50. 3179 3180 Collective on MPI_Comm 3181 3182 Input Parameters: 3183 + comm - MPI communicator, set to PETSC_COMM_SELF 3184 . bs - size of block 3185 . m - number of rows 3186 . n - number of columns 3187 . nz - number of nonzero blocks per block row (same for all rows) 3188 - nnz - array containing the number of nonzero blocks in the various block rows 3189 (possibly different for each block row) or PETSC_NULL 3190 3191 Output Parameter: 3192 . A - the matrix 3193 3194 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3195 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3196 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3197 3198 Options Database Keys: 3199 . -mat_no_unroll - uses code that does not unroll the loops in the 3200 block calculations (much slower) 3201 . -mat_block_size - size of the blocks to use 3202 3203 Level: intermediate 3204 3205 Notes: 3206 The number of rows and columns must be divisible by blocksize. 3207 3208 If the nnz parameter is given then the nz parameter is ignored 3209 3210 A nonzero block is any block that as 1 or more nonzeros in it 3211 3212 The block AIJ format is fully compatible with standard Fortran 77 3213 storage. That is, the stored row and column indices can begin at 3214 either one (as in Fortran) or zero. See the users' manual for details. 3215 3216 Specify the preallocated storage with either nz or nnz (not both). 3217 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3218 allocation. For additional details, see the users manual chapter on 3219 matrices. 3220 3221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3222 @*/ 3223 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3224 { 3225 PetscErrorCode ierr; 3226 3227 PetscFunctionBegin; 3228 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3229 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3230 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3231 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3232 PetscFunctionReturn(0); 3233 } 3234 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3237 /*@C 3238 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3239 per row in the matrix. For good matrix assembly performance the 3240 user should preallocate the matrix storage by setting the parameter nz 3241 (or the array nnz). By setting these parameters accurately, performance 3242 during matrix assembly can be increased by more than a factor of 50. 3243 3244 Collective on MPI_Comm 3245 3246 Input Parameters: 3247 + A - the matrix 3248 . bs - size of block 3249 . nz - number of block nonzeros per block row (same for all rows) 3250 - nnz - array containing the number of block nonzeros in the various block rows 3251 (possibly different for each block row) or PETSC_NULL 3252 3253 Options Database Keys: 3254 . -mat_no_unroll - uses code that does not unroll the loops in the 3255 block calculations (much slower) 3256 . -mat_block_size - size of the blocks to use 3257 3258 Level: intermediate 3259 3260 Notes: 3261 If the nnz parameter is given then the nz parameter is ignored 3262 3263 You can call MatGetInfo() to get information on how effective the preallocation was; 3264 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3265 You can also run with the option -info and look for messages with the string 3266 malloc in them to see if additional memory allocation was needed. 3267 3268 The block AIJ format is fully compatible with standard Fortran 77 3269 storage. That is, the stored row and column indices can begin at 3270 either one (as in Fortran) or zero. See the users' manual for details. 3271 3272 Specify the preallocated storage with either nz or nnz (not both). 3273 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3274 allocation. For additional details, see the users manual chapter on 3275 matrices. 3276 3277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3278 @*/ 3279 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3280 { 3281 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3282 3283 PetscFunctionBegin; 3284 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3285 if (f) { 3286 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3287 } 3288 PetscFunctionReturn(0); 3289 } 3290 3291 #undef __FUNCT__ 3292 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3293 /*@C 3294 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3295 (the default sequential PETSc format). 3296 3297 Collective on MPI_Comm 3298 3299 Input Parameters: 3300 + A - the matrix 3301 . i - the indices into j for the start of each local row (starts with zero) 3302 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3303 - v - optional values in the matrix 3304 3305 Level: developer 3306 3307 .keywords: matrix, aij, compressed row, sparse 3308 3309 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3310 @*/ 3311 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3312 { 3313 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3314 3315 PetscFunctionBegin; 3316 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3317 if (f) { 3318 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3319 } 3320 PetscFunctionReturn(0); 3321 } 3322 3323 3324 #undef __FUNCT__ 3325 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3326 /*@ 3327 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3328 (upper triangular entries in CSR format) provided by the user. 3329 3330 Collective on MPI_Comm 3331 3332 Input Parameters: 3333 + comm - must be an MPI communicator of size 1 3334 . bs - size of block 3335 . m - number of rows 3336 . n - number of columns 3337 . i - row indices 3338 . j - column indices 3339 - a - matrix values 3340 3341 Output Parameter: 3342 . mat - the matrix 3343 3344 Level: intermediate 3345 3346 Notes: 3347 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3348 once the matrix is destroyed 3349 3350 You cannot set new nonzero locations into this matrix, that will generate an error. 3351 3352 The i and j indices are 0 based 3353 3354 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3355 3356 @*/ 3357 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3358 { 3359 PetscErrorCode ierr; 3360 PetscInt ii; 3361 Mat_SeqBAIJ *baij; 3362 3363 PetscFunctionBegin; 3364 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3365 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3366 3367 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3368 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3369 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3370 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3371 baij = (Mat_SeqBAIJ*)(*mat)->data; 3372 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3373 3374 baij->i = i; 3375 baij->j = j; 3376 baij->a = a; 3377 baij->singlemalloc = PETSC_FALSE; 3378 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3379 baij->free_a = PETSC_FALSE; 3380 baij->free_ij = PETSC_FALSE; 3381 3382 for (ii=0; ii<m; ii++) { 3383 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3384 #if defined(PETSC_USE_DEBUG) 3385 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3386 #endif 3387 } 3388 #if defined(PETSC_USE_DEBUG) 3389 for (ii=0; ii<baij->i[m]; ii++) { 3390 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3391 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3392 } 3393 #endif 3394 3395 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3396 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3397 PetscFunctionReturn(0); 3398 } 3399