1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 118 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,s1; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0]; 146 i2 = 1; 147 idiag += 1; 148 for (i=1; i<m; i++) { 149 v = aa + ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; 153 while (nz--) { 154 idx = (*vi++); 155 x1 = x[idx]; 156 s1 -= v[0]*x1; 157 v += 1; 158 } 159 x[i2] = idiag[0]*s1; 160 idiag += 1; 161 i2 += 1; 162 } 163 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 164 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 165 } 166 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 167 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 168 i2 = 0; 169 mdiag = a->idiag+a->mbs; 170 for (i=0; i<m; i++) { 171 x1 = x[i2]; 172 x[i2] = mdiag[0]*x1; 173 mdiag += 1; 174 i2 += 1; 175 } 176 ierr = PetscLogFlops(m);CHKERRQ(ierr); 177 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 181 idiag = a->idiag+a->mbs - 1; 182 i2 = m - 1; 183 x1 = x[i2]; 184 x[i2] = idiag[0]*x1; 185 idiag -= 1; 186 i2 -= 1; 187 for (i=m-2; i>=0; i--) { 188 v = aa + (diag[i]+1); 189 vi = aj + diag[i] + 1; 190 nz = ai[i+1] - diag[i] - 1; 191 s1 = x[i2]; 192 while (nz--) { 193 idx = (*vi++); 194 x1 = x[idx]; 195 s1 -= v[0]*x1; 196 v += 1; 197 } 198 x[i2] = idiag[0]*s1; 199 idiag -= 2; 200 i2 -= 1; 201 } 202 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 203 } 204 } else { 205 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 206 } 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 214 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 PetscScalar *x,x1,x2,s1,s2; 218 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 219 PetscErrorCode ierr; 220 PetscInt m = a->mbs,i,i2,nz,idx; 221 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 222 223 PetscFunctionBegin; 224 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 225 its = its*lits; 226 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 227 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 228 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 229 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 230 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 231 232 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 233 234 diag = a->diag; 235 idiag = a->idiag; 236 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 237 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 241 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 242 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 243 i2 = 2; 244 idiag += 4; 245 for (i=1; i<m; i++) { 246 v = aa + 4*ai[i]; 247 vi = aj + ai[i]; 248 nz = diag[i] - ai[i]; 249 s1 = b[i2]; s2 = b[i2+1]; 250 while (nz--) { 251 idx = 2*(*vi++); 252 x1 = x[idx]; x2 = x[1+idx]; 253 s1 -= v[0]*x1 + v[2]*x2; 254 s2 -= v[1]*x1 + v[3]*x2; 255 v += 4; 256 } 257 x[i2] = idiag[0]*s1 + idiag[2]*s2; 258 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 259 idiag += 4; 260 i2 += 2; 261 } 262 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 263 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 264 } 265 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 266 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 267 i2 = 0; 268 mdiag = a->idiag+4*a->mbs; 269 for (i=0; i<m; i++) { 270 x1 = x[i2]; x2 = x[i2+1]; 271 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 272 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 273 mdiag += 4; 274 i2 += 2; 275 } 276 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 277 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 278 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 279 } 280 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 281 idiag = a->idiag+4*a->mbs - 4; 282 i2 = 2*m - 2; 283 x1 = x[i2]; x2 = x[i2+1]; 284 x[i2] = idiag[0]*x1 + idiag[2]*x2; 285 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 286 idiag -= 4; 287 i2 -= 2; 288 for (i=m-2; i>=0; i--) { 289 v = aa + 4*(diag[i]+1); 290 vi = aj + diag[i] + 1; 291 nz = ai[i+1] - diag[i] - 1; 292 s1 = x[i2]; s2 = x[i2+1]; 293 while (nz--) { 294 idx = 2*(*vi++); 295 x1 = x[idx]; x2 = x[1+idx]; 296 s1 -= v[0]*x1 + v[2]*x2; 297 s2 -= v[1]*x1 + v[3]*x2; 298 v += 4; 299 } 300 x[i2] = idiag[0]*s1 + idiag[2]*s2; 301 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 302 idiag -= 4; 303 i2 -= 2; 304 } 305 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309 } 310 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 311 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 317 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 318 { 319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 320 PetscScalar *x,x1,x2,x3,s1,s2,s3; 321 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 322 PetscErrorCode ierr; 323 PetscInt m = a->mbs,i,i2,nz,idx; 324 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325 326 PetscFunctionBegin; 327 its = its*lits; 328 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 329 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 332 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334 335 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 336 337 diag = a->diag; 338 idiag = a->idiag; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 341 342 if (flag & SOR_ZERO_INITIAL_GUESS) { 343 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 344 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347 i2 = 3; 348 idiag += 9; 349 for (i=1; i<m; i++) { 350 v = aa + 9*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354 while (nz--) { 355 idx = 3*(*vi++); 356 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365 idiag += 9; 366 i2 += 3; 367 } 368 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 370 } 371 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373 i2 = 0; 374 mdiag = a->idiag+9*a->mbs; 375 for (i=0; i<m; i++) { 376 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380 mdiag += 9; 381 i2 += 3; 382 } 383 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 384 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 386 } 387 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 388 idiag = a->idiag+9*a->mbs - 9; 389 i2 = 3*m - 3; 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394 idiag -= 9; 395 i2 -= 3; 396 for (i=m-2; i>=0; i--) { 397 v = aa + 9*(diag[i]+1); 398 vi = aj + diag[i] + 1; 399 nz = ai[i+1] - diag[i] - 1; 400 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401 while (nz--) { 402 idx = 3*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407 v += 9; 408 } 409 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412 idiag -= 9; 413 i2 -= 3; 414 } 415 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 416 } 417 } else { 418 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 419 } 420 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 427 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 431 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 432 PetscErrorCode ierr; 433 PetscInt m = a->mbs,i,i2,nz,idx; 434 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 435 436 PetscFunctionBegin; 437 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 438 its = its*lits; 439 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 440 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 441 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 442 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 443 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 444 445 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 446 447 diag = a->diag; 448 idiag = a->idiag; 449 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 451 452 if (flag & SOR_ZERO_INITIAL_GUESS) { 453 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 454 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 455 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 456 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 457 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 458 i2 = 4; 459 idiag += 16; 460 for (i=1; i<m; i++) { 461 v = aa + 16*ai[i]; 462 vi = aj + ai[i]; 463 nz = diag[i] - ai[i]; 464 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 465 while (nz--) { 466 idx = 4*(*vi++); 467 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 468 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 469 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 470 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 471 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 472 v += 16; 473 } 474 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 475 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 476 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 477 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 478 idiag += 16; 479 i2 += 4; 480 } 481 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 482 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 483 } 484 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 485 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 486 i2 = 0; 487 mdiag = a->idiag+16*a->mbs; 488 for (i=0; i<m; i++) { 489 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 490 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 491 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 492 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 493 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 494 mdiag += 16; 495 i2 += 4; 496 } 497 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 498 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 499 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 500 } 501 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 502 idiag = a->idiag+16*a->mbs - 16; 503 i2 = 4*m - 4; 504 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 505 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 506 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 507 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 508 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 509 idiag -= 16; 510 i2 -= 4; 511 for (i=m-2; i>=0; i--) { 512 v = aa + 16*(diag[i]+1); 513 vi = aj + diag[i] + 1; 514 nz = ai[i+1] - diag[i] - 1; 515 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 516 while (nz--) { 517 idx = 4*(*vi++); 518 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 519 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 520 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 521 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 522 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 523 v += 16; 524 } 525 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 526 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 527 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 528 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 529 idiag -= 16; 530 i2 -= 4; 531 } 532 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 533 } 534 } else { 535 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 536 } 537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 538 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 544 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 547 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 548 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 549 PetscErrorCode ierr; 550 PetscInt m = a->mbs,i,i2,nz,idx; 551 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 552 553 PetscFunctionBegin; 554 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 555 its = its*lits; 556 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 557 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 558 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 559 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 560 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 561 562 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 563 564 diag = a->diag; 565 idiag = a->idiag; 566 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 567 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 568 569 if (flag & SOR_ZERO_INITIAL_GUESS) { 570 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 571 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 572 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 573 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 574 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 575 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 576 i2 = 5; 577 idiag += 25; 578 for (i=1; i<m; i++) { 579 v = aa + 25*ai[i]; 580 vi = aj + ai[i]; 581 nz = diag[i] - ai[i]; 582 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 583 while (nz--) { 584 idx = 5*(*vi++); 585 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 586 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 587 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 588 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 589 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 590 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 591 v += 25; 592 } 593 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 594 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 595 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 596 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 597 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 598 idiag += 25; 599 i2 += 5; 600 } 601 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 602 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 603 } 604 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 605 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 606 i2 = 0; 607 mdiag = a->idiag+25*a->mbs; 608 for (i=0; i<m; i++) { 609 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 610 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 611 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 612 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 613 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 614 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 615 mdiag += 25; 616 i2 += 5; 617 } 618 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 619 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 620 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 621 } 622 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 623 idiag = a->idiag+25*a->mbs - 25; 624 i2 = 5*m - 5; 625 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 626 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 627 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 628 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 629 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 630 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 631 idiag -= 25; 632 i2 -= 5; 633 for (i=m-2; i>=0; i--) { 634 v = aa + 25*(diag[i]+1); 635 vi = aj + diag[i] + 1; 636 nz = ai[i+1] - diag[i] - 1; 637 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 638 while (nz--) { 639 idx = 5*(*vi++); 640 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 641 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 642 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 643 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 644 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 645 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 646 v += 25; 647 } 648 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 649 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 650 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 651 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 652 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 653 idiag -= 25; 654 i2 -= 5; 655 } 656 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 657 } 658 } else { 659 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 660 } 661 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 668 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 669 { 670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 671 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 672 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 673 PetscErrorCode ierr; 674 PetscInt m = a->mbs,i,i2,nz,idx; 675 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 676 677 PetscFunctionBegin; 678 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 679 its = its*lits; 680 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 681 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 682 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 683 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 684 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 685 686 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 687 688 diag = a->diag; 689 idiag = a->idiag; 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 692 693 if (flag & SOR_ZERO_INITIAL_GUESS) { 694 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 695 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 696 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 697 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 698 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 699 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 700 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 701 i2 = 6; 702 idiag += 36; 703 for (i=1; i<m; i++) { 704 v = aa + 36*ai[i]; 705 vi = aj + ai[i]; 706 nz = diag[i] - ai[i]; 707 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 708 while (nz--) { 709 idx = 6*(*vi++); 710 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 711 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 712 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 713 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 714 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 715 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 716 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 717 v += 36; 718 } 719 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 720 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 721 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 722 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 723 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 724 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 725 idiag += 36; 726 i2 += 6; 727 } 728 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 729 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 730 } 731 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 732 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 733 i2 = 0; 734 mdiag = a->idiag+36*a->mbs; 735 for (i=0; i<m; i++) { 736 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 737 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 738 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 739 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 740 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 741 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 742 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 743 mdiag += 36; 744 i2 += 6; 745 } 746 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 747 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 749 } 750 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 751 idiag = a->idiag+36*a->mbs - 36; 752 i2 = 6*m - 6; 753 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 754 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 755 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 756 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 757 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 758 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 759 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 760 idiag -= 36; 761 i2 -= 6; 762 for (i=m-2; i>=0; i--) { 763 v = aa + 36*(diag[i]+1); 764 vi = aj + diag[i] + 1; 765 nz = ai[i+1] - diag[i] - 1; 766 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 767 while (nz--) { 768 idx = 6*(*vi++); 769 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 770 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 771 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 772 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 773 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 774 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 775 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 776 v += 36; 777 } 778 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 779 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 780 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 781 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 782 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 783 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 784 idiag -= 36; 785 i2 -= 6; 786 } 787 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 788 } 789 } else { 790 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 791 } 792 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 793 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 799 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 800 { 801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 802 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 803 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 804 PetscErrorCode ierr; 805 PetscInt m = a->mbs,i,i2,nz,idx; 806 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 807 808 PetscFunctionBegin; 809 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 810 its = its*lits; 811 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 812 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 813 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 814 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 815 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 816 817 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 818 819 diag = a->diag; 820 idiag = a->idiag; 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 823 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 826 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 827 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 828 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 829 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 830 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 831 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 832 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 833 i2 = 7; 834 idiag += 49; 835 for (i=1; i<m; i++) { 836 v = aa + 49*ai[i]; 837 vi = aj + ai[i]; 838 nz = diag[i] - ai[i]; 839 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 843 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 844 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 845 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 846 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 847 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 848 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 849 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 850 v += 49; 851 } 852 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 853 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 854 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 855 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 856 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 857 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 858 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 859 idiag += 49; 860 i2 += 7; 861 } 862 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 863 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 864 } 865 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 866 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 867 i2 = 0; 868 mdiag = a->idiag+49*a->mbs; 869 for (i=0; i<m; i++) { 870 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 871 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 872 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 873 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 874 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 875 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 876 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 877 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 878 mdiag += 36; 879 i2 += 6; 880 } 881 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 882 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 883 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 886 idiag = a->idiag+49*a->mbs - 49; 887 i2 = 7*m - 7; 888 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 889 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 890 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 891 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 892 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 893 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 894 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 895 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 896 idiag -= 49; 897 i2 -= 7; 898 for (i=m-2; i>=0; i--) { 899 v = aa + 49*(diag[i]+1); 900 vi = aj + diag[i] + 1; 901 nz = ai[i+1] - diag[i] - 1; 902 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 903 while (nz--) { 904 idx = 7*(*vi++); 905 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 906 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 907 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 908 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 909 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 910 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 911 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 912 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 913 v += 49; 914 } 915 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 916 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 917 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 918 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 919 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 920 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 921 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 926 } 927 } else { 928 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 929 } 930 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 931 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 935 /* 936 Special version for direct calls from Fortran (Used in PETSc-fun3d) 937 */ 938 #if defined(PETSC_HAVE_FORTRAN_CAPS) 939 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 941 #define matsetvaluesblocked4_ matsetvaluesblocked4 942 #endif 943 944 EXTERN_C_BEGIN 945 #undef __FUNCT__ 946 #define __FUNCT__ "matsetvaluesblocked4_" 947 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 948 { 949 Mat A = *AA; 950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 951 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 952 PetscInt *ai=a->i,*ailen=a->ilen; 953 PetscInt *aj=a->j,stepval,lastcol = -1; 954 const PetscScalar *value = v; 955 MatScalar *ap,*aa = a->a,*bap; 956 957 PetscFunctionBegin; 958 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 959 stepval = (n-1)*4; 960 for (k=0; k<m; k++) { /* loop over added rows */ 961 row = im[k]; 962 rp = aj + ai[row]; 963 ap = aa + 16*ai[row]; 964 nrow = ailen[row]; 965 low = 0; 966 high = nrow; 967 for (l=0; l<n; l++) { /* loop over added columns */ 968 col = in[l]; 969 if (col <= lastcol) low = 0; else high = nrow; 970 lastcol = col; 971 value = v + k*(stepval+4 + l)*4; 972 while (high-low > 7) { 973 t = (low+high)/2; 974 if (rp[t] > col) high = t; 975 else low = t; 976 } 977 for (i=low; i<high; i++) { 978 if (rp[i] > col) break; 979 if (rp[i] == col) { 980 bap = ap + 16*i; 981 for (ii=0; ii<4; ii++,value+=stepval) { 982 for (jj=ii; jj<16; jj+=4) { 983 bap[jj] += *value++; 984 } 985 } 986 goto noinsert2; 987 } 988 } 989 N = nrow++ - 1; 990 high++; /* added new column index thus must search to one higher than before */ 991 /* shift up all the later entries in this row */ 992 for (ii=N; ii>=i; ii--) { 993 rp[ii+1] = rp[ii]; 994 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 995 } 996 if (N >= i) { 997 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 998 } 999 rp[i] = col; 1000 bap = ap + 16*i; 1001 for (ii=0; ii<4; ii++,value+=stepval) { 1002 for (jj=ii; jj<16; jj+=4) { 1003 bap[jj] = *value++; 1004 } 1005 } 1006 noinsert2:; 1007 low = i; 1008 } 1009 ailen[row] = nrow; 1010 } 1011 PetscFunctionReturnVoid(); 1012 } 1013 EXTERN_C_END 1014 1015 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1016 #define matsetvalues4_ MATSETVALUES4 1017 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1018 #define matsetvalues4_ matsetvalues4 1019 #endif 1020 1021 EXTERN_C_BEGIN 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatSetValues4_" 1024 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1025 { 1026 Mat A = *AA; 1027 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1028 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1029 PetscInt *ai=a->i,*ailen=a->ilen; 1030 PetscInt *aj=a->j,brow,bcol; 1031 PetscInt ridx,cidx,lastcol = -1; 1032 MatScalar *ap,value,*aa=a->a,*bap; 1033 1034 PetscFunctionBegin; 1035 for (k=0; k<m; k++) { /* loop over added rows */ 1036 row = im[k]; brow = row/4; 1037 rp = aj + ai[brow]; 1038 ap = aa + 16*ai[brow]; 1039 nrow = ailen[brow]; 1040 low = 0; 1041 high = nrow; 1042 for (l=0; l<n; l++) { /* loop over added columns */ 1043 col = in[l]; bcol = col/4; 1044 ridx = row % 4; cidx = col % 4; 1045 value = v[l + k*n]; 1046 if (col <= lastcol) low = 0; else high = nrow; 1047 lastcol = col; 1048 while (high-low > 7) { 1049 t = (low+high)/2; 1050 if (rp[t] > bcol) high = t; 1051 else low = t; 1052 } 1053 for (i=low; i<high; i++) { 1054 if (rp[i] > bcol) break; 1055 if (rp[i] == bcol) { 1056 bap = ap + 16*i + 4*cidx + ridx; 1057 *bap += value; 1058 goto noinsert1; 1059 } 1060 } 1061 N = nrow++ - 1; 1062 high++; /* added new column thus must search to one higher than before */ 1063 /* shift up all the later entries in this row */ 1064 for (ii=N; ii>=i; ii--) { 1065 rp[ii+1] = rp[ii]; 1066 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1067 } 1068 if (N>=i) { 1069 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1070 } 1071 rp[i] = bcol; 1072 ap[16*i + 4*cidx + ridx] = value; 1073 noinsert1:; 1074 low = i; 1075 } 1076 ailen[brow] = nrow; 1077 } 1078 PetscFunctionReturnVoid(); 1079 } 1080 EXTERN_C_END 1081 1082 /* UGLY, ugly, ugly 1083 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 1084 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 1085 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 1086 converts the entries into single precision and then calls ..._MatScalar() to put them 1087 into the single precision data structures. 1088 */ 1089 #if defined(PETSC_USE_MAT_SINGLE) 1090 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 1091 #else 1092 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 1093 #endif 1094 1095 #define CHUNKSIZE 10 1096 1097 /* 1098 Checks for missing diagonals 1099 */ 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1102 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1103 { 1104 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt *diag,*jj = a->j,i; 1107 1108 PetscFunctionBegin; 1109 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1110 *missing = PETSC_FALSE; 1111 diag = a->diag; 1112 for (i=0; i<a->mbs; i++) { 1113 if (jj[diag[i]] != i) { 1114 *missing = PETSC_TRUE; 1115 if (d) *d = i; 1116 } 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124 { 1125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 PetscInt i,j,m = a->mbs; 1128 1129 PetscFunctionBegin; 1130 if (!a->diag) { 1131 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1132 } 1133 for (i=0; i<m; i++) { 1134 a->diag[i] = a->i[i+1]; 1135 for (j=a->i[i]; j<a->i[i+1]; j++) { 1136 if (a->j[j] == i) { 1137 a->diag[i] = j; 1138 break; 1139 } 1140 } 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 1146 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1150 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1151 { 1152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1155 PetscInt *tia, *tja; 1156 1157 PetscFunctionBegin; 1158 *nn = n; 1159 if (!ia) PetscFunctionReturn(0); 1160 if (symmetric) { 1161 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1162 } else { 1163 tia = a->i; tja = a->j; 1164 } 1165 1166 if (!blockcompressed && bs > 1) { 1167 (*nn) *= bs; 1168 nbs = bs; 1169 /* malloc & create the natural set of indices */ 1170 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1171 if (n) { 1172 (*ia)[0] = 0; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1175 } 1176 } 1177 1178 for (i=1; i<n; i++) { 1179 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1180 for (j=1; j<bs; j++) { 1181 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1182 } 1183 } 1184 if (n) { 1185 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1186 } 1187 1188 if (ja) { 1189 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<n; i++) { 1192 for (j=0; j<bs; j++) { 1193 for (k=tia[i]; k<tia[i+1]; k++) { 1194 for (l=0; l<bs; l++) { 1195 (*ja)[cnt++] = bs*tja[k] + l; 1196 } 1197 } 1198 } 1199 } 1200 } 1201 1202 n *= bs; 1203 nz *= bs*bs; 1204 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1205 ierr = PetscFree(tia);CHKERRQ(ierr); 1206 ierr = PetscFree(tja);CHKERRQ(ierr); 1207 } 1208 } else { 1209 *ia = tia; 1210 if (ja) *ja = tja; 1211 } 1212 if (oshift == 1) { 1213 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1214 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1221 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1222 { 1223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 PetscInt i,n = a->mbs,nz = a->i[n]; 1226 1227 PetscFunctionBegin; 1228 if (!ia) PetscFunctionReturn(0); 1229 if (!blockcompressed && A->rmap.bs > 1) { 1230 ierr = PetscFree(*ia);CHKERRQ(ierr); 1231 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1232 } else if (symmetric) { 1233 ierr = PetscFree(*ia);CHKERRQ(ierr); 1234 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1235 } else if (oshift == 1) { /* blockcompressed */ 1236 for (i=0; i<n+1; i++) a->i[i]--; 1237 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1244 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 #if defined(PETSC_USE_LOG) 1251 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1252 #endif 1253 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1254 if (a->row) { 1255 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1256 } 1257 if (a->col) { 1258 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1259 } 1260 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1261 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1262 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1263 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1264 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1265 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1266 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1267 #if defined(PETSC_USE_MAT_SINGLE) 1268 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 1269 #endif 1270 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1271 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1272 1273 ierr = PetscFree(a);CHKERRQ(ierr); 1274 1275 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1288 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1289 { 1290 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 switch (op) { 1295 case MAT_ROW_ORIENTED: 1296 a->roworiented = flg; 1297 break; 1298 case MAT_KEEP_ZEROED_ROWS: 1299 a->keepzeroedrows = flg; 1300 break; 1301 case MAT_NEW_NONZERO_LOCATIONS: 1302 a->nonew = (flg ? 0 : 1); 1303 break; 1304 case MAT_NEW_NONZERO_LOCATION_ERR: 1305 a->nonew = (flg ? -1 : 0); 1306 break; 1307 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1308 a->nonew = (flg ? -2 : 0); 1309 break; 1310 case MAT_NEW_DIAGONALS: 1311 case MAT_IGNORE_OFF_PROC_ENTRIES: 1312 case MAT_USE_HASH_TABLE: 1313 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1314 break; 1315 case MAT_SYMMETRIC: 1316 case MAT_STRUCTURALLY_SYMMETRIC: 1317 case MAT_HERMITIAN: 1318 case MAT_SYMMETRY_ETERNAL: 1319 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1320 break; 1321 default: 1322 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1323 } 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1329 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1330 { 1331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1334 MatScalar *aa,*aa_i; 1335 PetscScalar *v_i; 1336 1337 PetscFunctionBegin; 1338 bs = A->rmap.bs; 1339 ai = a->i; 1340 aj = a->j; 1341 aa = a->a; 1342 bs2 = a->bs2; 1343 1344 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1345 1346 bn = row/bs; /* Block number */ 1347 bp = row % bs; /* Block Position */ 1348 M = ai[bn+1] - ai[bn]; 1349 *nz = bs*M; 1350 1351 if (v) { 1352 *v = 0; 1353 if (*nz) { 1354 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1355 for (i=0; i<M; i++) { /* for each block in the block row */ 1356 v_i = *v + i*bs; 1357 aa_i = aa + bs2*(ai[bn] + i); 1358 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1359 } 1360 } 1361 } 1362 1363 if (idx) { 1364 *idx = 0; 1365 if (*nz) { 1366 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1367 for (i=0; i<M; i++) { /* for each block in the block row */ 1368 idx_i = *idx + i*bs; 1369 itmp = bs*aj[ai[bn] + i]; 1370 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1371 } 1372 } 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1379 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1380 { 1381 PetscErrorCode ierr; 1382 1383 PetscFunctionBegin; 1384 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1385 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1391 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1392 { 1393 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1394 Mat C; 1395 PetscErrorCode ierr; 1396 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1397 PetscInt *rows,*cols,bs2=a->bs2; 1398 PetscScalar *array; 1399 1400 PetscFunctionBegin; 1401 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1402 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1403 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1404 1405 #if defined(PETSC_USE_MAT_SINGLE) 1406 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1407 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1408 #else 1409 array = a->a; 1410 #endif 1411 1412 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1413 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1414 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1415 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1416 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1417 ierr = PetscFree(col);CHKERRQ(ierr); 1418 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1419 cols = rows + bs; 1420 for (i=0; i<mbs; i++) { 1421 cols[0] = i*bs; 1422 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1423 len = ai[i+1] - ai[i]; 1424 for (j=0; j<len; j++) { 1425 rows[0] = (*aj++)*bs; 1426 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1427 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1428 array += bs2; 1429 } 1430 } 1431 ierr = PetscFree(rows);CHKERRQ(ierr); 1432 #if defined(PETSC_USE_MAT_SINGLE) 1433 ierr = PetscFree(array); 1434 #endif 1435 1436 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1438 1439 if (B) { 1440 *B = C; 1441 } else { 1442 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1449 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1450 { 1451 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1452 PetscErrorCode ierr; 1453 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1454 int fd; 1455 PetscScalar *aa; 1456 FILE *file; 1457 1458 PetscFunctionBegin; 1459 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1460 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1461 col_lens[0] = MAT_FILE_COOKIE; 1462 1463 col_lens[1] = A->rmap.N; 1464 col_lens[2] = A->cmap.n; 1465 col_lens[3] = a->nz*bs2; 1466 1467 /* store lengths of each row and write (including header) to file */ 1468 count = 0; 1469 for (i=0; i<a->mbs; i++) { 1470 for (j=0; j<bs; j++) { 1471 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1472 } 1473 } 1474 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1475 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1476 1477 /* store column indices (zero start index) */ 1478 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1479 count = 0; 1480 for (i=0; i<a->mbs; i++) { 1481 for (j=0; j<bs; j++) { 1482 for (k=a->i[i]; k<a->i[i+1]; k++) { 1483 for (l=0; l<bs; l++) { 1484 jj[count++] = bs*a->j[k] + l; 1485 } 1486 } 1487 } 1488 } 1489 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1490 ierr = PetscFree(jj);CHKERRQ(ierr); 1491 1492 /* store nonzero values */ 1493 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1494 count = 0; 1495 for (i=0; i<a->mbs; i++) { 1496 for (j=0; j<bs; j++) { 1497 for (k=a->i[i]; k<a->i[i+1]; k++) { 1498 for (l=0; l<bs; l++) { 1499 aa[count++] = a->a[bs2*k + l*bs + j]; 1500 } 1501 } 1502 } 1503 } 1504 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1505 ierr = PetscFree(aa);CHKERRQ(ierr); 1506 1507 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1508 if (file) { 1509 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1516 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1517 { 1518 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1521 PetscViewerFormat format; 1522 1523 PetscFunctionBegin; 1524 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1525 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1526 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1527 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1528 Mat aij; 1529 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1530 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1531 ierr = MatDestroy(aij);CHKERRQ(ierr); 1532 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1533 PetscFunctionReturn(0); 1534 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1535 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1536 for (i=0; i<a->mbs; i++) { 1537 for (j=0; j<bs; j++) { 1538 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1539 for (k=a->i[i]; k<a->i[i+1]; k++) { 1540 for (l=0; l<bs; l++) { 1541 #if defined(PETSC_USE_COMPLEX) 1542 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1544 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1547 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1548 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } 1551 #else 1552 if (a->a[bs2*k + l*bs + j] != 0.0) { 1553 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1554 } 1555 #endif 1556 } 1557 } 1558 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1559 } 1560 } 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1562 } else { 1563 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1564 for (i=0; i<a->mbs; i++) { 1565 for (j=0; j<bs; j++) { 1566 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1567 for (k=a->i[i]; k<a->i[i+1]; k++) { 1568 for (l=0; l<bs; l++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1572 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1574 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1575 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } else { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1578 } 1579 #else 1580 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1581 #endif 1582 } 1583 } 1584 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1585 } 1586 } 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1588 } 1589 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1595 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1596 { 1597 Mat A = (Mat) Aa; 1598 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1599 PetscErrorCode ierr; 1600 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1601 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1602 MatScalar *aa; 1603 PetscViewer viewer; 1604 1605 PetscFunctionBegin; 1606 1607 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1608 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1609 1610 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1611 1612 /* loop over matrix elements drawing boxes */ 1613 color = PETSC_DRAW_BLUE; 1614 for (i=0,row=0; i<mbs; i++,row+=bs) { 1615 for (j=a->i[i]; j<a->i[i+1]; j++) { 1616 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1617 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1618 aa = a->a + j*bs2; 1619 for (k=0; k<bs; k++) { 1620 for (l=0; l<bs; l++) { 1621 if (PetscRealPart(*aa++) >= 0.) continue; 1622 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1623 } 1624 } 1625 } 1626 } 1627 color = PETSC_DRAW_CYAN; 1628 for (i=0,row=0; i<mbs; i++,row+=bs) { 1629 for (j=a->i[i]; j<a->i[i+1]; j++) { 1630 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1631 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1632 aa = a->a + j*bs2; 1633 for (k=0; k<bs; k++) { 1634 for (l=0; l<bs; l++) { 1635 if (PetscRealPart(*aa++) != 0.) continue; 1636 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1637 } 1638 } 1639 } 1640 } 1641 1642 color = PETSC_DRAW_RED; 1643 for (i=0,row=0; i<mbs; i++,row+=bs) { 1644 for (j=a->i[i]; j<a->i[i+1]; j++) { 1645 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1646 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1647 aa = a->a + j*bs2; 1648 for (k=0; k<bs; k++) { 1649 for (l=0; l<bs; l++) { 1650 if (PetscRealPart(*aa++) <= 0.) continue; 1651 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1652 } 1653 } 1654 } 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1661 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1662 { 1663 PetscErrorCode ierr; 1664 PetscReal xl,yl,xr,yr,w,h; 1665 PetscDraw draw; 1666 PetscTruth isnull; 1667 1668 PetscFunctionBegin; 1669 1670 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1671 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1672 1673 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1674 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1675 xr += w; yr += h; xl = -w; yl = -h; 1676 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1677 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1678 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatView_SeqBAIJ" 1684 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1685 { 1686 PetscErrorCode ierr; 1687 PetscTruth iascii,isbinary,isdraw; 1688 1689 PetscFunctionBegin; 1690 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1691 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1692 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1693 if (iascii){ 1694 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1695 } else if (isbinary) { 1696 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1697 } else if (isdraw) { 1698 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1699 } else { 1700 Mat B; 1701 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1702 ierr = MatView(B,viewer);CHKERRQ(ierr); 1703 ierr = MatDestroy(B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1711 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1712 { 1713 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1714 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1715 PetscInt *ai = a->i,*ailen = a->ilen; 1716 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1717 MatScalar *ap,*aa = a->a; 1718 1719 PetscFunctionBegin; 1720 for (k=0; k<m; k++) { /* loop over rows */ 1721 row = im[k]; brow = row/bs; 1722 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1723 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1724 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1725 nrow = ailen[brow]; 1726 for (l=0; l<n; l++) { /* loop over columns */ 1727 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1728 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1729 col = in[l] ; 1730 bcol = col/bs; 1731 cidx = col%bs; 1732 ridx = row%bs; 1733 high = nrow; 1734 low = 0; /* assume unsorted */ 1735 while (high-low > 5) { 1736 t = (low+high)/2; 1737 if (rp[t] > bcol) high = t; 1738 else low = t; 1739 } 1740 for (i=low; i<high; i++) { 1741 if (rp[i] > bcol) break; 1742 if (rp[i] == bcol) { 1743 *v++ = ap[bs2*i+bs*cidx+ridx]; 1744 goto finished; 1745 } 1746 } 1747 *v++ = 0.0; 1748 finished:; 1749 } 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #if defined(PETSC_USE_MAT_SINGLE) 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1757 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1758 { 1759 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1760 PetscErrorCode ierr; 1761 PetscInt i,N = m*n*b->bs2; 1762 MatScalar *vsingle; 1763 1764 PetscFunctionBegin; 1765 if (N > b->setvalueslen) { 1766 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1767 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1768 b->setvalueslen = N; 1769 } 1770 vsingle = b->setvaluescopy; 1771 for (i=0; i<N; i++) { 1772 vsingle[i] = v[i]; 1773 } 1774 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 #endif 1778 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1782 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1783 { 1784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1785 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1786 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1787 PetscErrorCode ierr; 1788 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1789 PetscTruth roworiented=a->roworiented; 1790 const MatScalar *value = v; 1791 MatScalar *ap,*aa = a->a,*bap; 1792 1793 PetscFunctionBegin; 1794 if (roworiented) { 1795 stepval = (n-1)*bs; 1796 } else { 1797 stepval = (m-1)*bs; 1798 } 1799 for (k=0; k<m; k++) { /* loop over added rows */ 1800 row = im[k]; 1801 if (row < 0) continue; 1802 #if defined(PETSC_USE_DEBUG) 1803 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1804 #endif 1805 rp = aj + ai[row]; 1806 ap = aa + bs2*ai[row]; 1807 rmax = imax[row]; 1808 nrow = ailen[row]; 1809 low = 0; 1810 high = nrow; 1811 for (l=0; l<n; l++) { /* loop over added columns */ 1812 if (in[l] < 0) continue; 1813 #if defined(PETSC_USE_DEBUG) 1814 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1815 #endif 1816 col = in[l]; 1817 if (roworiented) { 1818 value = v + k*(stepval+bs)*bs + l*bs; 1819 } else { 1820 value = v + l*(stepval+bs)*bs + k*bs; 1821 } 1822 if (col <= lastcol) low = 0; else high = nrow; 1823 lastcol = col; 1824 while (high-low > 7) { 1825 t = (low+high)/2; 1826 if (rp[t] > col) high = t; 1827 else low = t; 1828 } 1829 for (i=low; i<high; i++) { 1830 if (rp[i] > col) break; 1831 if (rp[i] == col) { 1832 bap = ap + bs2*i; 1833 if (roworiented) { 1834 if (is == ADD_VALUES) { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=ii; jj<bs2; jj+=bs) { 1837 bap[jj] += *value++; 1838 } 1839 } 1840 } else { 1841 for (ii=0; ii<bs; ii++,value+=stepval) { 1842 for (jj=ii; jj<bs2; jj+=bs) { 1843 bap[jj] = *value++; 1844 } 1845 } 1846 } 1847 } else { 1848 if (is == ADD_VALUES) { 1849 for (ii=0; ii<bs; ii++,value+=stepval) { 1850 for (jj=0; jj<bs; jj++) { 1851 *bap++ += *value++; 1852 } 1853 } 1854 } else { 1855 for (ii=0; ii<bs; ii++,value+=stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 *bap++ = *value++; 1858 } 1859 } 1860 } 1861 } 1862 goto noinsert2; 1863 } 1864 } 1865 if (nonew == 1) goto noinsert2; 1866 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1867 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1868 N = nrow++ - 1; high++; 1869 /* shift up all the later entries in this row */ 1870 for (ii=N; ii>=i; ii--) { 1871 rp[ii+1] = rp[ii]; 1872 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1873 } 1874 if (N >= i) { 1875 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1876 } 1877 rp[i] = col; 1878 bap = ap + bs2*i; 1879 if (roworiented) { 1880 for (ii=0; ii<bs; ii++,value+=stepval) { 1881 for (jj=ii; jj<bs2; jj+=bs) { 1882 bap[jj] = *value++; 1883 } 1884 } 1885 } else { 1886 for (ii=0; ii<bs; ii++,value+=stepval) { 1887 for (jj=0; jj<bs; jj++) { 1888 *bap++ = *value++; 1889 } 1890 } 1891 } 1892 noinsert2:; 1893 low = i; 1894 } 1895 ailen[row] = nrow; 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1902 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1903 { 1904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1905 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1906 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1907 PetscErrorCode ierr; 1908 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1909 MatScalar *aa = a->a,*ap; 1910 PetscReal ratio=0.6; 1911 1912 PetscFunctionBegin; 1913 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1914 1915 if (m) rmax = ailen[0]; 1916 for (i=1; i<mbs; i++) { 1917 /* move each row back by the amount of empty slots (fshift) before it*/ 1918 fshift += imax[i-1] - ailen[i-1]; 1919 rmax = PetscMax(rmax,ailen[i]); 1920 if (fshift) { 1921 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1922 N = ailen[i]; 1923 for (j=0; j<N; j++) { 1924 ip[j-fshift] = ip[j]; 1925 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1926 } 1927 } 1928 ai[i] = ai[i-1] + ailen[i-1]; 1929 } 1930 if (mbs) { 1931 fshift += imax[mbs-1] - ailen[mbs-1]; 1932 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1933 } 1934 /* reset ilen and imax for each row */ 1935 for (i=0; i<mbs; i++) { 1936 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1950 a->reallocs = 0; 1951 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1952 1953 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1954 if (a->compressedrow.use){ 1955 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1956 } 1957 1958 A->same_nonzero = PETSC_TRUE; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /* 1963 This function returns an array of flags which indicate the locations of contiguous 1964 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1965 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1966 Assume: sizes should be long enough to hold all the values. 1967 */ 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1970 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1971 { 1972 PetscInt i,j,k,row; 1973 PetscTruth flg; 1974 1975 PetscFunctionBegin; 1976 for (i=0,j=0; i<n; j++) { 1977 row = idx[i]; 1978 if (row%bs!=0) { /* Not the begining of a block */ 1979 sizes[j] = 1; 1980 i++; 1981 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1982 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1983 i++; 1984 } else { /* Begining of the block, so check if the complete block exists */ 1985 flg = PETSC_TRUE; 1986 for (k=1; k<bs; k++) { 1987 if (row+k != idx[i+k]) { /* break in the block */ 1988 flg = PETSC_FALSE; 1989 break; 1990 } 1991 } 1992 if (flg) { /* No break in the bs */ 1993 sizes[j] = bs; 1994 i+= bs; 1995 } else { 1996 sizes[j] = 1; 1997 i++; 1998 } 1999 } 2000 } 2001 *bs_max = j; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2008 { 2009 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 PetscInt i,j,k,count,*rows; 2012 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 2013 PetscScalar zero = 0.0; 2014 MatScalar *aa; 2015 2016 PetscFunctionBegin; 2017 /* Make a copy of the IS and sort it */ 2018 /* allocate memory for rows,sizes */ 2019 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2020 sizes = rows + is_n; 2021 2022 /* copy IS values to rows, and sort them */ 2023 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2024 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2025 if (baij->keepzeroedrows) { 2026 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2027 bs_max = is_n; 2028 A->same_nonzero = PETSC_TRUE; 2029 } else { 2030 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2031 A->same_nonzero = PETSC_FALSE; 2032 } 2033 2034 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2035 row = rows[j]; 2036 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2037 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2038 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2039 if (sizes[i] == bs && !baij->keepzeroedrows) { 2040 if (diag != 0.0) { 2041 if (baij->ilen[row/bs] > 0) { 2042 baij->ilen[row/bs] = 1; 2043 baij->j[baij->i[row/bs]] = row/bs; 2044 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2045 } 2046 /* Now insert all the diagonal values for this bs */ 2047 for (k=0; k<bs; k++) { 2048 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2049 } 2050 } else { /* (diag == 0.0) */ 2051 baij->ilen[row/bs] = 0; 2052 } /* end (diag == 0.0) */ 2053 } else { /* (sizes[i] != bs) */ 2054 #if defined (PETSC_USE_DEBUG) 2055 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2056 #endif 2057 for (k=0; k<count; k++) { 2058 aa[0] = zero; 2059 aa += bs; 2060 } 2061 if (diag != 0.0) { 2062 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2063 } 2064 } 2065 } 2066 2067 ierr = PetscFree(rows);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2074 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2075 { 2076 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2077 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2078 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2079 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2080 PetscErrorCode ierr; 2081 PetscInt ridx,cidx,bs2=a->bs2; 2082 PetscTruth roworiented=a->roworiented; 2083 MatScalar *ap,value,*aa=a->a,*bap; 2084 2085 PetscFunctionBegin; 2086 for (k=0; k<m; k++) { /* loop over added rows */ 2087 row = im[k]; 2088 brow = row/bs; 2089 if (row < 0) continue; 2090 #if defined(PETSC_USE_DEBUG) 2091 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2092 #endif 2093 rp = aj + ai[brow]; 2094 ap = aa + bs2*ai[brow]; 2095 rmax = imax[brow]; 2096 nrow = ailen[brow]; 2097 low = 0; 2098 high = nrow; 2099 for (l=0; l<n; l++) { /* loop over added columns */ 2100 if (in[l] < 0) continue; 2101 #if defined(PETSC_USE_DEBUG) 2102 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2103 #endif 2104 col = in[l]; bcol = col/bs; 2105 ridx = row % bs; cidx = col % bs; 2106 if (roworiented) { 2107 value = v[l + k*n]; 2108 } else { 2109 value = v[k + l*m]; 2110 } 2111 if (col <= lastcol) low = 0; else high = nrow; 2112 lastcol = col; 2113 while (high-low > 7) { 2114 t = (low+high)/2; 2115 if (rp[t] > bcol) high = t; 2116 else low = t; 2117 } 2118 for (i=low; i<high; i++) { 2119 if (rp[i] > bcol) break; 2120 if (rp[i] == bcol) { 2121 bap = ap + bs2*i + bs*cidx + ridx; 2122 if (is == ADD_VALUES) *bap += value; 2123 else *bap = value; 2124 goto noinsert1; 2125 } 2126 } 2127 if (nonew == 1) goto noinsert1; 2128 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2129 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2130 N = nrow++ - 1; high++; 2131 /* shift up all the later entries in this row */ 2132 for (ii=N; ii>=i; ii--) { 2133 rp[ii+1] = rp[ii]; 2134 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2135 } 2136 if (N>=i) { 2137 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2138 } 2139 rp[i] = bcol; 2140 ap[bs2*i + bs*cidx + ridx] = value; 2141 a->nz++; 2142 noinsert1:; 2143 low = i; 2144 } 2145 ailen[brow] = nrow; 2146 } 2147 A->same_nonzero = PETSC_FALSE; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2154 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2155 { 2156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2157 Mat outA; 2158 PetscErrorCode ierr; 2159 PetscTruth row_identity,col_identity; 2160 2161 PetscFunctionBegin; 2162 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2163 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2164 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2165 if (!row_identity || !col_identity) { 2166 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2167 } 2168 2169 outA = inA; 2170 inA->factor = FACTOR_LU; 2171 2172 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2173 2174 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2175 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2176 a->row = row; 2177 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2178 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2179 a->col = col; 2180 2181 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2182 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2183 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2184 2185 /* 2186 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2187 for ILU(0) factorization with natural ordering 2188 */ 2189 if (inA->rmap.bs < 8) { 2190 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2191 } else { 2192 if (!a->solve_work) { 2193 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2194 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2195 } 2196 } 2197 2198 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2199 2200 PetscFunctionReturn(0); 2201 } 2202 2203 EXTERN_C_BEGIN 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2206 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2207 { 2208 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2209 PetscInt i,nz,nbs; 2210 2211 PetscFunctionBegin; 2212 nz = baij->maxnz/baij->bs2; 2213 nbs = baij->nbs; 2214 for (i=0; i<nz; i++) { 2215 baij->j[i] = indices[i]; 2216 } 2217 baij->nz = nz; 2218 for (i=0; i<nbs; i++) { 2219 baij->ilen[i] = baij->imax[i]; 2220 } 2221 2222 PetscFunctionReturn(0); 2223 } 2224 EXTERN_C_END 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2228 /*@ 2229 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2230 in the matrix. 2231 2232 Input Parameters: 2233 + mat - the SeqBAIJ matrix 2234 - indices - the column indices 2235 2236 Level: advanced 2237 2238 Notes: 2239 This can be called if you have precomputed the nonzero structure of the 2240 matrix and want to provide it to the matrix object to improve the performance 2241 of the MatSetValues() operation. 2242 2243 You MUST have set the correct numbers of nonzeros per row in the call to 2244 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2245 2246 MUST be called before any calls to MatSetValues(); 2247 2248 @*/ 2249 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2250 { 2251 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2252 2253 PetscFunctionBegin; 2254 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2255 PetscValidPointer(indices,2); 2256 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2257 if (f) { 2258 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2259 } else { 2260 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2267 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2268 { 2269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2272 PetscReal atmp; 2273 PetscScalar *x,zero = 0.0; 2274 MatScalar *aa; 2275 PetscInt ncols,brow,krow,kcol; 2276 2277 PetscFunctionBegin; 2278 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2279 bs = A->rmap.bs; 2280 aa = a->a; 2281 ai = a->i; 2282 aj = a->j; 2283 mbs = a->mbs; 2284 2285 ierr = VecSet(v,zero);CHKERRQ(ierr); 2286 if (idx) { 2287 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 2288 } 2289 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2290 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2291 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2292 for (i=0; i<mbs; i++) { 2293 ncols = ai[1] - ai[0]; ai++; 2294 brow = bs*i; 2295 for (j=0; j<ncols; j++){ 2296 for (kcol=0; kcol<bs; kcol++){ 2297 for (krow=0; krow<bs; krow++){ 2298 atmp = PetscAbsScalar(*aa);aa++; 2299 row = brow + krow; /* row index */ 2300 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2301 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2302 } 2303 } 2304 aj++; 2305 } 2306 } 2307 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "MatCopy_SeqBAIJ" 2313 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2314 { 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 /* If the two matrices have the same copy implementation, use fast copy. */ 2319 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2321 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2322 2323 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2324 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2325 } 2326 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2327 } else { 2328 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2329 } 2330 PetscFunctionReturn(0); 2331 } 2332 2333 #undef __FUNCT__ 2334 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2335 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2336 { 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2346 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2347 { 2348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2349 PetscFunctionBegin; 2350 *array = a->a; 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2356 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2357 { 2358 PetscFunctionBegin; 2359 PetscFunctionReturn(0); 2360 } 2361 2362 #include "petscblaslapack.h" 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2365 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2366 { 2367 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2368 PetscErrorCode ierr; 2369 PetscInt i,bs=Y->rmap.bs,j,bs2; 2370 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2371 2372 PetscFunctionBegin; 2373 if (str == SAME_NONZERO_PATTERN) { 2374 PetscScalar alpha = a; 2375 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2376 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2377 if (y->xtoy && y->XtoY != X) { 2378 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2379 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2380 } 2381 if (!y->xtoy) { /* get xtoy */ 2382 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2383 y->XtoY = X; 2384 } 2385 bs2 = bs*bs; 2386 for (i=0; i<x->nz; i++) { 2387 j = 0; 2388 while (j < bs2){ 2389 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2390 j++; 2391 } 2392 } 2393 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2394 } else { 2395 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2402 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2403 { 2404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2405 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2406 PetscScalar *aa = a->a; 2407 2408 PetscFunctionBegin; 2409 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2415 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2416 { 2417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2418 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2419 PetscScalar *aa = a->a; 2420 2421 PetscFunctionBegin; 2422 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 2427 /* -------------------------------------------------------------------*/ 2428 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2429 MatGetRow_SeqBAIJ, 2430 MatRestoreRow_SeqBAIJ, 2431 MatMult_SeqBAIJ_N, 2432 /* 4*/ MatMultAdd_SeqBAIJ_N, 2433 MatMultTranspose_SeqBAIJ, 2434 MatMultTransposeAdd_SeqBAIJ, 2435 MatSolve_SeqBAIJ_N, 2436 0, 2437 0, 2438 /*10*/ 0, 2439 MatLUFactor_SeqBAIJ, 2440 0, 2441 0, 2442 MatTranspose_SeqBAIJ, 2443 /*15*/ MatGetInfo_SeqBAIJ, 2444 MatEqual_SeqBAIJ, 2445 MatGetDiagonal_SeqBAIJ, 2446 MatDiagonalScale_SeqBAIJ, 2447 MatNorm_SeqBAIJ, 2448 /*20*/ 0, 2449 MatAssemblyEnd_SeqBAIJ, 2450 0, 2451 MatSetOption_SeqBAIJ, 2452 MatZeroEntries_SeqBAIJ, 2453 /*25*/ MatZeroRows_SeqBAIJ, 2454 MatLUFactorSymbolic_SeqBAIJ, 2455 MatLUFactorNumeric_SeqBAIJ_N, 2456 MatCholeskyFactorSymbolic_SeqBAIJ, 2457 MatCholeskyFactorNumeric_SeqBAIJ_N, 2458 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2459 MatILUFactorSymbolic_SeqBAIJ, 2460 MatICCFactorSymbolic_SeqBAIJ, 2461 MatGetArray_SeqBAIJ, 2462 MatRestoreArray_SeqBAIJ, 2463 /*35*/ MatDuplicate_SeqBAIJ, 2464 0, 2465 0, 2466 MatILUFactor_SeqBAIJ, 2467 0, 2468 /*40*/ MatAXPY_SeqBAIJ, 2469 MatGetSubMatrices_SeqBAIJ, 2470 MatIncreaseOverlap_SeqBAIJ, 2471 MatGetValues_SeqBAIJ, 2472 MatCopy_SeqBAIJ, 2473 /*45*/ 0, 2474 MatScale_SeqBAIJ, 2475 0, 2476 0, 2477 0, 2478 /*50*/ 0, 2479 MatGetRowIJ_SeqBAIJ, 2480 MatRestoreRowIJ_SeqBAIJ, 2481 0, 2482 0, 2483 /*55*/ 0, 2484 0, 2485 0, 2486 0, 2487 MatSetValuesBlocked_SeqBAIJ, 2488 /*60*/ MatGetSubMatrix_SeqBAIJ, 2489 MatDestroy_SeqBAIJ, 2490 MatView_SeqBAIJ, 2491 0, 2492 0, 2493 /*65*/ 0, 2494 0, 2495 0, 2496 0, 2497 0, 2498 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2499 MatConvert_Basic, 2500 0, 2501 0, 2502 0, 2503 /*75*/ 0, 2504 0, 2505 0, 2506 0, 2507 0, 2508 /*80*/ 0, 2509 0, 2510 0, 2511 0, 2512 MatLoad_SeqBAIJ, 2513 /*85*/ 0, 2514 0, 2515 0, 2516 0, 2517 0, 2518 /*90*/ 0, 2519 0, 2520 0, 2521 0, 2522 0, 2523 /*95*/ 0, 2524 0, 2525 0, 2526 0, 2527 0, 2528 /*100*/0, 2529 0, 2530 0, 2531 0, 2532 0, 2533 /*105*/0, 2534 MatRealPart_SeqBAIJ, 2535 MatImaginaryPart_SeqBAIJ, 2536 0, 2537 0, 2538 /*110*/0, 2539 0, 2540 0, 2541 0, 2542 MatMissingDiagonal_SeqBAIJ 2543 /*115*/ 2544 }; 2545 2546 EXTERN_C_BEGIN 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2549 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2550 { 2551 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2552 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2553 PetscErrorCode ierr; 2554 2555 PetscFunctionBegin; 2556 if (aij->nonew != 1) { 2557 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2558 } 2559 2560 /* allocate space for values if not already there */ 2561 if (!aij->saved_values) { 2562 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2563 } 2564 2565 /* copy values over */ 2566 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 EXTERN_C_END 2570 2571 EXTERN_C_BEGIN 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2574 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2575 { 2576 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2577 PetscErrorCode ierr; 2578 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2579 2580 PetscFunctionBegin; 2581 if (aij->nonew != 1) { 2582 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2583 } 2584 if (!aij->saved_values) { 2585 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2586 } 2587 2588 /* copy values over */ 2589 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 EXTERN_C_END 2593 2594 EXTERN_C_BEGIN 2595 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2596 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2597 EXTERN_C_END 2598 2599 EXTERN_C_BEGIN 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2602 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2603 { 2604 Mat_SeqBAIJ *b; 2605 PetscErrorCode ierr; 2606 PetscInt i,mbs,nbs,bs2,newbs = bs; 2607 PetscTruth flg,skipallocation = PETSC_FALSE; 2608 2609 PetscFunctionBegin; 2610 2611 if (nz == MAT_SKIP_ALLOCATION) { 2612 skipallocation = PETSC_TRUE; 2613 nz = 0; 2614 } 2615 2616 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2617 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2618 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2619 2620 if (nnz && newbs != bs) { 2621 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2622 } 2623 bs = newbs; 2624 2625 B->rmap.bs = B->cmap.bs = bs; 2626 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2627 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2628 2629 B->preallocated = PETSC_TRUE; 2630 2631 mbs = B->rmap.n/bs; 2632 nbs = B->cmap.n/bs; 2633 bs2 = bs*bs; 2634 2635 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2636 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2637 } 2638 2639 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2640 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2641 if (nnz) { 2642 for (i=0; i<mbs; i++) { 2643 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2644 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2645 } 2646 } 2647 2648 b = (Mat_SeqBAIJ*)B->data; 2649 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2650 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2651 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2652 2653 B->ops->solve = MatSolve_SeqBAIJ_Update; 2654 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2655 if (!flg) { 2656 switch (bs) { 2657 case 1: 2658 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2659 B->ops->mult = MatMult_SeqBAIJ_1; 2660 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2661 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2662 break; 2663 case 2: 2664 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2665 B->ops->mult = MatMult_SeqBAIJ_2; 2666 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2667 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2668 break; 2669 case 3: 2670 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2671 B->ops->mult = MatMult_SeqBAIJ_3; 2672 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2673 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2674 break; 2675 case 4: 2676 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2677 B->ops->mult = MatMult_SeqBAIJ_4; 2678 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2679 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2680 break; 2681 case 5: 2682 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2683 B->ops->mult = MatMult_SeqBAIJ_5; 2684 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2685 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2686 break; 2687 case 6: 2688 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2689 B->ops->mult = MatMult_SeqBAIJ_6; 2690 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2691 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2692 break; 2693 case 7: 2694 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2695 B->ops->mult = MatMult_SeqBAIJ_7; 2696 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2697 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2698 break; 2699 default: 2700 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2701 B->ops->mult = MatMult_SeqBAIJ_N; 2702 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2703 break; 2704 } 2705 } 2706 B->rmap.bs = bs; 2707 b->mbs = mbs; 2708 b->nbs = nbs; 2709 if (!skipallocation) { 2710 if (!b->imax) { 2711 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2712 } 2713 /* b->ilen will count nonzeros in each block row so far. */ 2714 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2715 if (!nnz) { 2716 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2717 else if (nz <= 0) nz = 1; 2718 for (i=0; i<mbs; i++) b->imax[i] = nz; 2719 nz = nz*mbs; 2720 } else { 2721 nz = 0; 2722 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2723 } 2724 2725 /* allocate the matrix space */ 2726 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2727 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2728 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2729 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2730 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2731 b->singlemalloc = PETSC_TRUE; 2732 b->i[0] = 0; 2733 for (i=1; i<mbs+1; i++) { 2734 b->i[i] = b->i[i-1] + b->imax[i-1]; 2735 } 2736 b->free_a = PETSC_TRUE; 2737 b->free_ij = PETSC_TRUE; 2738 } else { 2739 b->free_a = PETSC_FALSE; 2740 b->free_ij = PETSC_FALSE; 2741 } 2742 2743 B->rmap.bs = bs; 2744 b->bs2 = bs2; 2745 b->mbs = mbs; 2746 b->nz = 0; 2747 b->maxnz = nz*bs2; 2748 B->info.nz_unneeded = (PetscReal)b->maxnz; 2749 PetscFunctionReturn(0); 2750 } 2751 EXTERN_C_END 2752 2753 /*MC 2754 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2755 block sparse compressed row format. 2756 2757 Options Database Keys: 2758 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2759 2760 Level: beginner 2761 2762 .seealso: MatCreateSeqBAIJ() 2763 M*/ 2764 2765 EXTERN_C_BEGIN 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "MatCreate_SeqBAIJ" 2768 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2769 { 2770 PetscErrorCode ierr; 2771 PetscMPIInt size; 2772 Mat_SeqBAIJ *b; 2773 2774 PetscFunctionBegin; 2775 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2776 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2777 2778 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2779 B->data = (void*)b; 2780 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2781 B->factor = 0; 2782 B->mapping = 0; 2783 b->row = 0; 2784 b->col = 0; 2785 b->icol = 0; 2786 b->reallocs = 0; 2787 b->saved_values = 0; 2788 #if defined(PETSC_USE_MAT_SINGLE) 2789 b->setvalueslen = 0; 2790 b->setvaluescopy = PETSC_NULL; 2791 #endif 2792 2793 b->roworiented = PETSC_TRUE; 2794 b->nonew = 0; 2795 b->diag = 0; 2796 b->solve_work = 0; 2797 b->mult_work = 0; 2798 B->spptr = 0; 2799 B->info.nz_unneeded = (PetscReal)b->maxnz; 2800 b->keepzeroedrows = PETSC_FALSE; 2801 b->xtoy = 0; 2802 b->XtoY = 0; 2803 b->compressedrow.use = PETSC_FALSE; 2804 b->compressedrow.nrows = 0; 2805 b->compressedrow.i = PETSC_NULL; 2806 b->compressedrow.rindex = PETSC_NULL; 2807 b->compressedrow.checked = PETSC_FALSE; 2808 B->same_nonzero = PETSC_FALSE; 2809 2810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2811 "MatInvertBlockDiagonal_SeqBAIJ", 2812 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2814 "MatStoreValues_SeqBAIJ", 2815 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2817 "MatRetrieveValues_SeqBAIJ", 2818 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2820 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2821 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2823 "MatConvert_SeqBAIJ_SeqAIJ", 2824 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2826 "MatConvert_SeqBAIJ_SeqSBAIJ", 2827 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2829 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2830 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2831 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 EXTERN_C_END 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2838 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2839 { 2840 Mat C; 2841 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2842 PetscErrorCode ierr; 2843 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2844 2845 PetscFunctionBegin; 2846 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2847 2848 *B = 0; 2849 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2850 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2851 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2852 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2853 c = (Mat_SeqBAIJ*)C->data; 2854 2855 C->rmap.N = A->rmap.N; 2856 C->cmap.N = A->cmap.N; 2857 C->rmap.bs = A->rmap.bs; 2858 c->bs2 = a->bs2; 2859 c->mbs = a->mbs; 2860 c->nbs = a->nbs; 2861 2862 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2863 for (i=0; i<mbs; i++) { 2864 c->imax[i] = a->imax[i]; 2865 c->ilen[i] = a->ilen[i]; 2866 } 2867 2868 /* allocate the matrix space */ 2869 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2870 c->singlemalloc = PETSC_TRUE; 2871 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2872 if (mbs > 0) { 2873 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2874 if (cpvalues == MAT_COPY_VALUES) { 2875 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2876 } else { 2877 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2878 } 2879 } 2880 c->roworiented = a->roworiented; 2881 c->nonew = a->nonew; 2882 2883 if (a->diag) { 2884 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2885 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2886 for (i=0; i<mbs; i++) { 2887 c->diag[i] = a->diag[i]; 2888 } 2889 } else c->diag = 0; 2890 c->nz = a->nz; 2891 c->maxnz = a->maxnz; 2892 c->solve_work = 0; 2893 c->mult_work = 0; 2894 c->free_a = PETSC_TRUE; 2895 c->free_ij = PETSC_TRUE; 2896 C->preallocated = PETSC_TRUE; 2897 C->assembled = PETSC_TRUE; 2898 2899 c->compressedrow.use = a->compressedrow.use; 2900 c->compressedrow.nrows = a->compressedrow.nrows; 2901 c->compressedrow.checked = a->compressedrow.checked; 2902 if ( a->compressedrow.checked && a->compressedrow.use){ 2903 i = a->compressedrow.nrows; 2904 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2905 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2906 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2907 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2908 } else { 2909 c->compressedrow.use = PETSC_FALSE; 2910 c->compressedrow.i = PETSC_NULL; 2911 c->compressedrow.rindex = PETSC_NULL; 2912 } 2913 C->same_nonzero = A->same_nonzero; 2914 *B = C; 2915 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2916 PetscFunctionReturn(0); 2917 } 2918 2919 #undef __FUNCT__ 2920 #define __FUNCT__ "MatLoad_SeqBAIJ" 2921 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2922 { 2923 Mat_SeqBAIJ *a; 2924 Mat B; 2925 PetscErrorCode ierr; 2926 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2927 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2928 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2929 PetscInt *masked,nmask,tmp,bs2,ishift; 2930 PetscMPIInt size; 2931 int fd; 2932 PetscScalar *aa; 2933 MPI_Comm comm = ((PetscObject)viewer)->comm; 2934 2935 PetscFunctionBegin; 2936 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2937 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2938 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2939 bs2 = bs*bs; 2940 2941 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2942 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2943 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2945 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2946 M = header[1]; N = header[2]; nz = header[3]; 2947 2948 if (header[3] < 0) { 2949 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2950 } 2951 2952 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2953 2954 /* 2955 This code adds extra rows to make sure the number of rows is 2956 divisible by the blocksize 2957 */ 2958 mbs = M/bs; 2959 extra_rows = bs - M + bs*(mbs); 2960 if (extra_rows == bs) extra_rows = 0; 2961 else mbs++; 2962 if (extra_rows) { 2963 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2964 } 2965 2966 /* read in row lengths */ 2967 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2968 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2969 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2970 2971 /* read in column indices */ 2972 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2973 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2974 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2975 2976 /* loop over row lengths determining block row lengths */ 2977 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2978 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2979 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2980 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2981 masked = mask + mbs; 2982 rowcount = 0; nzcount = 0; 2983 for (i=0; i<mbs; i++) { 2984 nmask = 0; 2985 for (j=0; j<bs; j++) { 2986 kmax = rowlengths[rowcount]; 2987 for (k=0; k<kmax; k++) { 2988 tmp = jj[nzcount++]/bs; 2989 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2990 } 2991 rowcount++; 2992 } 2993 browlengths[i] += nmask; 2994 /* zero out the mask elements we set */ 2995 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2996 } 2997 2998 /* create our matrix */ 2999 ierr = MatCreate(comm,&B); 3000 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3001 ierr = MatSetType(B,type);CHKERRQ(ierr); 3002 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3003 a = (Mat_SeqBAIJ*)B->data; 3004 3005 /* set matrix "i" values */ 3006 a->i[0] = 0; 3007 for (i=1; i<= mbs; i++) { 3008 a->i[i] = a->i[i-1] + browlengths[i-1]; 3009 a->ilen[i-1] = browlengths[i-1]; 3010 } 3011 a->nz = 0; 3012 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3013 3014 /* read in nonzero values */ 3015 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3016 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3017 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3018 3019 /* set "a" and "j" values into matrix */ 3020 nzcount = 0; jcount = 0; 3021 for (i=0; i<mbs; i++) { 3022 nzcountb = nzcount; 3023 nmask = 0; 3024 for (j=0; j<bs; j++) { 3025 kmax = rowlengths[i*bs+j]; 3026 for (k=0; k<kmax; k++) { 3027 tmp = jj[nzcount++]/bs; 3028 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3029 } 3030 } 3031 /* sort the masked values */ 3032 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3033 3034 /* set "j" values into matrix */ 3035 maskcount = 1; 3036 for (j=0; j<nmask; j++) { 3037 a->j[jcount++] = masked[j]; 3038 mask[masked[j]] = maskcount++; 3039 } 3040 /* set "a" values into matrix */ 3041 ishift = bs2*a->i[i]; 3042 for (j=0; j<bs; j++) { 3043 kmax = rowlengths[i*bs+j]; 3044 for (k=0; k<kmax; k++) { 3045 tmp = jj[nzcountb]/bs ; 3046 block = mask[tmp] - 1; 3047 point = jj[nzcountb] - bs*tmp; 3048 idx = ishift + bs2*block + j + bs*point; 3049 a->a[idx] = (MatScalar)aa[nzcountb++]; 3050 } 3051 } 3052 /* zero out the mask elements we set */ 3053 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3054 } 3055 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3056 3057 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3058 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3059 ierr = PetscFree(aa);CHKERRQ(ierr); 3060 ierr = PetscFree(jj);CHKERRQ(ierr); 3061 ierr = PetscFree(mask);CHKERRQ(ierr); 3062 3063 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3064 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3065 ierr = MatView_Private(B);CHKERRQ(ierr); 3066 3067 *A = B; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatCreateSeqBAIJ" 3073 /*@C 3074 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3075 compressed row) format. For good matrix assembly performance the 3076 user should preallocate the matrix storage by setting the parameter nz 3077 (or the array nnz). By setting these parameters accurately, performance 3078 during matrix assembly can be increased by more than a factor of 50. 3079 3080 Collective on MPI_Comm 3081 3082 Input Parameters: 3083 + comm - MPI communicator, set to PETSC_COMM_SELF 3084 . bs - size of block 3085 . m - number of rows 3086 . n - number of columns 3087 . nz - number of nonzero blocks per block row (same for all rows) 3088 - nnz - array containing the number of nonzero blocks in the various block rows 3089 (possibly different for each block row) or PETSC_NULL 3090 3091 Output Parameter: 3092 . A - the matrix 3093 3094 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3095 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3096 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3097 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3098 3099 Options Database Keys: 3100 . -mat_no_unroll - uses code that does not unroll the loops in the 3101 block calculations (much slower) 3102 . -mat_block_size - size of the blocks to use 3103 3104 Level: intermediate 3105 3106 Notes: 3107 The number of rows and columns must be divisible by blocksize. 3108 3109 If the nnz parameter is given then the nz parameter is ignored 3110 3111 A nonzero block is any block that as 1 or more nonzeros in it 3112 3113 The block AIJ format is fully compatible with standard Fortran 77 3114 storage. That is, the stored row and column indices can begin at 3115 either one (as in Fortran) or zero. See the users' manual for details. 3116 3117 Specify the preallocated storage with either nz or nnz (not both). 3118 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3119 allocation. For additional details, see the users manual chapter on 3120 matrices. 3121 3122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3123 @*/ 3124 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3125 { 3126 PetscErrorCode ierr; 3127 3128 PetscFunctionBegin; 3129 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3130 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3131 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3132 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3138 /*@C 3139 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3140 per row in the matrix. For good matrix assembly performance the 3141 user should preallocate the matrix storage by setting the parameter nz 3142 (or the array nnz). By setting these parameters accurately, performance 3143 during matrix assembly can be increased by more than a factor of 50. 3144 3145 Collective on MPI_Comm 3146 3147 Input Parameters: 3148 + A - the matrix 3149 . bs - size of block 3150 . nz - number of block nonzeros per block row (same for all rows) 3151 - nnz - array containing the number of block nonzeros in the various block rows 3152 (possibly different for each block row) or PETSC_NULL 3153 3154 Options Database Keys: 3155 . -mat_no_unroll - uses code that does not unroll the loops in the 3156 block calculations (much slower) 3157 . -mat_block_size - size of the blocks to use 3158 3159 Level: intermediate 3160 3161 Notes: 3162 If the nnz parameter is given then the nz parameter is ignored 3163 3164 You can call MatGetInfo() to get information on how effective the preallocation was; 3165 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3166 You can also run with the option -info and look for messages with the string 3167 malloc in them to see if additional memory allocation was needed. 3168 3169 The block AIJ format is fully compatible with standard Fortran 77 3170 storage. That is, the stored row and column indices can begin at 3171 either one (as in Fortran) or zero. See the users' manual for details. 3172 3173 Specify the preallocated storage with either nz or nnz (not both). 3174 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3175 allocation. For additional details, see the users manual chapter on 3176 matrices. 3177 3178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3179 @*/ 3180 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3181 { 3182 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3183 3184 PetscFunctionBegin; 3185 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3186 if (f) { 3187 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3188 } 3189 PetscFunctionReturn(0); 3190 } 3191 3192 #undef __FUNCT__ 3193 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3194 /*@ 3195 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3196 (upper triangular entries in CSR format) provided by the user. 3197 3198 Collective on MPI_Comm 3199 3200 Input Parameters: 3201 + comm - must be an MPI communicator of size 1 3202 . bs - size of block 3203 . m - number of rows 3204 . n - number of columns 3205 . i - row indices 3206 . j - column indices 3207 - a - matrix values 3208 3209 Output Parameter: 3210 . mat - the matrix 3211 3212 Level: intermediate 3213 3214 Notes: 3215 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3216 once the matrix is destroyed 3217 3218 You cannot set new nonzero locations into this matrix, that will generate an error. 3219 3220 The i and j indices are 0 based 3221 3222 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3223 3224 @*/ 3225 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3226 { 3227 PetscErrorCode ierr; 3228 PetscInt ii; 3229 Mat_SeqBAIJ *baij; 3230 3231 PetscFunctionBegin; 3232 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3233 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3234 3235 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3236 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3237 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3238 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3239 baij = (Mat_SeqBAIJ*)(*mat)->data; 3240 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3241 3242 baij->i = i; 3243 baij->j = j; 3244 baij->a = a; 3245 baij->singlemalloc = PETSC_FALSE; 3246 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3247 baij->free_a = PETSC_FALSE; 3248 baij->free_ij = PETSC_FALSE; 3249 3250 for (ii=0; ii<m; ii++) { 3251 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3252 #if defined(PETSC_USE_DEBUG) 3253 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3254 #endif 3255 } 3256 #if defined(PETSC_USE_DEBUG) 3257 for (ii=0; ii<baij->i[m]; ii++) { 3258 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3259 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3260 } 3261 #endif 3262 3263 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267