1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr,(*f)(Mat); 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 32 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33 if (f) { 34 ierr = (*f)(mat);CHKERRQ(ierr); 35 } else { 36 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 EXTERN_C_BEGIN 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45 { 46 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 47 PetscErrorCode ierr; 48 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5]; 49 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25]; 50 PetscReal shift = 0.0; 51 52 PetscFunctionBegin; 53 if (a->idiagvalid) PetscFunctionReturn(0); 54 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 55 diag_offset = a->diag; 56 if (!a->idiag) { 57 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 58 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 138 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,j; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 for (j=0; j<nz; j++) { 175 s1 -= v[j]*x[vi[j]]; 176 } 177 x[i2] = idiag[0]*s1; 178 idiag += 1; 179 i2 += 1; 180 } 181 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 182 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 183 } 184 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 185 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 186 i2 = 0; 187 mdiag = a->idiag+a->mbs; 188 for (i=0; i<m; i++) { 189 x1 = x[i2]; 190 x[i2] = mdiag[0]*x1; 191 mdiag += 1; 192 i2 += 1; 193 } 194 ierr = PetscLogFlops(m);CHKERRQ(ierr); 195 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 196 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 197 } 198 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 199 idiag = a->idiag+a->mbs - 1; 200 i2 = m - 1; 201 x1 = x[i2]; 202 x[i2] = idiag[0]*x1; 203 idiag -= 1; 204 i2 -= 1; 205 for (i=m-2; i>=0; i--) { 206 v = aa + (diag[i]+1); 207 vi = aj + diag[i] + 1; 208 nz = ai[i+1] - diag[i] - 1; 209 s1 = x[i2]; 210 for (j=0; j<nz; j++) { 211 s1 -= v[j]*x[vi[j]]; 212 } 213 x[i2] = idiag[0]*s1; 214 idiag -= 1; 215 i2 -= 1; 216 } 217 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 218 } 219 } else { 220 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 221 } 222 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 #undef __FUNCT__ 228 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 229 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 230 { 231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 232 PetscScalar *x,x1,x2,s1,s2; 233 const PetscScalar *b; 234 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 235 PetscErrorCode ierr; 236 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 237 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 238 239 PetscFunctionBegin; 240 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 241 its = its*lits; 242 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 243 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 244 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 245 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 246 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 247 248 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 249 250 diag = a->diag; 251 idiag = a->idiag; 252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 254 255 if (flag & SOR_ZERO_INITIAL_GUESS) { 256 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 257 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 258 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 259 i2 = 2; 260 idiag += 4; 261 for (i=1; i<m; i++) { 262 v = aa + 4*ai[i]; 263 vi = aj + ai[i]; 264 nz = diag[i] - ai[i]; 265 s1 = b[i2]; s2 = b[i2+1]; 266 for (j=0; j<nz; j++) { 267 idx = 2*vi[j]; 268 it = 4*j; 269 x1 = x[idx]; x2 = x[1+idx]; 270 s1 -= v[it]*x1 + v[it+2]*x2; 271 s2 -= v[it+1]*x1 + v[it+3]*x2; 272 } 273 x[i2] = idiag[0]*s1 + idiag[2]*s2; 274 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 275 idiag += 4; 276 i2 += 2; 277 } 278 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 279 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 280 } 281 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 282 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 283 i2 = 0; 284 mdiag = a->idiag+4*a->mbs; 285 for (i=0; i<m; i++) { 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 288 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 289 mdiag += 4; 290 i2 += 2; 291 } 292 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 293 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 294 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 295 } 296 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 297 idiag = a->idiag+4*a->mbs - 4; 298 i2 = 2*m - 2; 299 x1 = x[i2]; x2 = x[i2+1]; 300 x[i2] = idiag[0]*x1 + idiag[2]*x2; 301 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 302 idiag -= 4; 303 i2 -= 2; 304 for (i=m-2; i>=0; i--) { 305 v = aa + 4*(diag[i]+1); 306 vi = aj + diag[i] + 1; 307 nz = ai[i+1] - diag[i] - 1; 308 s1 = x[i2]; s2 = x[i2+1]; 309 for (j=0; j<nz; j++) { 310 idx = 2*vi[j]; 311 it = 4*j; 312 x1 = x[idx]; x2 = x[1+idx]; 313 s1 -= v[it]*x1 + v[it+2]*x2; 314 s2 -= v[it+1]*x1 + v[it+3]*x2; 315 } 316 x[i2] = idiag[0]*s1 + idiag[2]*s2; 317 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 318 idiag -= 4; 319 i2 -= 2; 320 } 321 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 322 } 323 } else { 324 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 325 } 326 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 327 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 333 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 334 { 335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 336 PetscScalar *x,x1,x2,x3,s1,s2,s3; 337 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 338 const PetscScalar *b; 339 PetscErrorCode ierr; 340 PetscInt m = a->mbs,i,i2,nz,idx; 341 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 342 343 PetscFunctionBegin; 344 its = its*lits; 345 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 346 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 347 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 348 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 349 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 350 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 351 352 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 353 354 diag = a->diag; 355 idiag = a->idiag; 356 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 358 359 if (flag & SOR_ZERO_INITIAL_GUESS) { 360 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 361 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 362 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 363 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 364 i2 = 3; 365 idiag += 9; 366 for (i=1; i<m; i++) { 367 v = aa + 9*ai[i]; 368 vi = aj + ai[i]; 369 nz = diag[i] - ai[i]; 370 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 371 while (nz--) { 372 idx = 3*(*vi++); 373 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 374 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 375 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 376 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 377 v += 9; 378 } 379 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 380 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 381 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 382 idiag += 9; 383 i2 += 3; 384 } 385 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 387 } 388 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390 i2 = 0; 391 mdiag = a->idiag+9*a->mbs; 392 for (i=0; i<m; i++) { 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 394 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 395 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 396 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 397 mdiag += 9; 398 i2 += 3; 399 } 400 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 401 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 402 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 403 } 404 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 405 idiag = a->idiag+9*a->mbs - 9; 406 i2 = 3*m - 3; 407 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 408 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 409 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 410 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 411 idiag -= 9; 412 i2 -= 3; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 9*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 418 while (nz--) { 419 idx = 3*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 421 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 422 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 423 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 424 v += 9; 425 } 426 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 427 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 428 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 429 idiag -= 9; 430 i2 -= 3; 431 } 432 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 433 } 434 } else { 435 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 436 } 437 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 444 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 445 { 446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 447 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 448 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 449 const PetscScalar *b; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 461 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 476 i2 = 4; 477 idiag += 16; 478 for (i=1; i<m; i++) { 479 v = aa + 16*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 483 while (nz--) { 484 idx = 4*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 486 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 487 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 488 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 489 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 490 v += 16; 491 } 492 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 493 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 494 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 495 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 496 idiag += 16; 497 i2 += 4; 498 } 499 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 500 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 501 } 502 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 503 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 504 i2 = 0; 505 mdiag = a->idiag+16*a->mbs; 506 for (i=0; i<m; i++) { 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 508 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 509 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 510 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 511 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 512 mdiag += 16; 513 i2 += 4; 514 } 515 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 516 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 517 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 518 } 519 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 520 idiag = a->idiag+16*a->mbs - 16; 521 i2 = 4*m - 4; 522 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 523 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 524 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 525 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 526 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 527 idiag -= 16; 528 i2 -= 4; 529 for (i=m-2; i>=0; i--) { 530 v = aa + 16*(diag[i]+1); 531 vi = aj + diag[i] + 1; 532 nz = ai[i+1] - diag[i] - 1; 533 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 534 while (nz--) { 535 idx = 4*(*vi++); 536 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 537 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 538 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 539 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 540 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 541 v += 16; 542 } 543 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 544 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 545 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 546 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 547 idiag -= 16; 548 i2 -= 4; 549 } 550 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 551 } 552 } else { 553 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 554 } 555 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 562 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 563 { 564 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 565 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 566 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 567 const PetscScalar *b; 568 PetscErrorCode ierr; 569 PetscInt m = a->mbs,i,i2,nz,idx; 570 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 571 572 PetscFunctionBegin; 573 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 574 its = its*lits; 575 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 576 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 577 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 578 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 579 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 580 581 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 582 583 diag = a->diag; 584 idiag = a->idiag; 585 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 586 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 587 588 if (flag & SOR_ZERO_INITIAL_GUESS) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 590 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 591 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 592 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 593 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 594 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 595 i2 = 5; 596 idiag += 25; 597 for (i=1; i<m; i++) { 598 v = aa + 25*ai[i]; 599 vi = aj + ai[i]; 600 nz = diag[i] - ai[i]; 601 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 602 while (nz--) { 603 idx = 5*(*vi++); 604 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 605 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 606 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 607 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 608 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 609 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 610 v += 25; 611 } 612 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 613 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 614 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 615 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 616 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 617 idiag += 25; 618 i2 += 5; 619 } 620 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 621 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 622 } 623 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 624 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 625 i2 = 0; 626 mdiag = a->idiag+25*a->mbs; 627 for (i=0; i<m; i++) { 628 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 629 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 630 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 631 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 632 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 633 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 634 mdiag += 25; 635 i2 += 5; 636 } 637 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 638 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 639 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 640 } 641 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 642 idiag = a->idiag+25*a->mbs - 25; 643 i2 = 5*m - 5; 644 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 645 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 646 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 647 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 648 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 649 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 650 idiag -= 25; 651 i2 -= 5; 652 for (i=m-2; i>=0; i--) { 653 v = aa + 25*(diag[i]+1); 654 vi = aj + diag[i] + 1; 655 nz = ai[i+1] - diag[i] - 1; 656 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 657 while (nz--) { 658 idx = 5*(*vi++); 659 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 660 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 661 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 662 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 663 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 664 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 665 v += 25; 666 } 667 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 668 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 669 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 670 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 671 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 672 idiag -= 25; 673 i2 -= 5; 674 } 675 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 676 } 677 } else { 678 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 679 } 680 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 681 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 687 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 691 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 692 const PetscScalar *b; 693 PetscErrorCode ierr; 694 PetscInt m = a->mbs,i,i2,nz,idx; 695 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 696 697 PetscFunctionBegin; 698 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 699 its = its*lits; 700 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 701 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 702 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 703 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 704 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 705 706 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 707 708 diag = a->diag; 709 idiag = a->idiag; 710 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 711 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 712 713 if (flag & SOR_ZERO_INITIAL_GUESS) { 714 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 715 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 716 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 717 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 718 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 719 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 720 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 721 i2 = 6; 722 idiag += 36; 723 for (i=1; i<m; i++) { 724 v = aa + 36*ai[i]; 725 vi = aj + ai[i]; 726 nz = diag[i] - ai[i]; 727 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 728 while (nz--) { 729 idx = 6*(*vi++); 730 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 731 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 732 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 733 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 734 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 735 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 736 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 737 v += 36; 738 } 739 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 740 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 741 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 742 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 743 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 744 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 745 idiag += 36; 746 i2 += 6; 747 } 748 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 749 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 750 } 751 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 752 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 753 i2 = 0; 754 mdiag = a->idiag+36*a->mbs; 755 for (i=0; i<m; i++) { 756 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 757 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 758 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 759 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 760 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 761 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 762 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 763 mdiag += 36; 764 i2 += 6; 765 } 766 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 767 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 768 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 769 } 770 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 771 idiag = a->idiag+36*a->mbs - 36; 772 i2 = 6*m - 6; 773 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 774 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 775 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 776 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 777 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 778 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 779 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 780 idiag -= 36; 781 i2 -= 6; 782 for (i=m-2; i>=0; i--) { 783 v = aa + 36*(diag[i]+1); 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 787 while (nz--) { 788 idx = 6*(*vi++); 789 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 790 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 791 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 792 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 793 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 794 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 795 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 796 v += 36; 797 } 798 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 799 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 800 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 801 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 802 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 803 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 804 idiag -= 36; 805 i2 -= 6; 806 } 807 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 808 } 809 } else { 810 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 811 } 812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 813 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 819 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 820 { 821 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 822 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 823 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 824 const PetscScalar *b; 825 PetscErrorCode ierr; 826 PetscInt m = a->mbs,i,i2,nz,idx; 827 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 828 829 PetscFunctionBegin; 830 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 831 its = its*lits; 832 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 833 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 834 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 835 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 836 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 837 838 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 839 840 diag = a->diag; 841 idiag = a->idiag; 842 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 843 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 844 845 if (flag & SOR_ZERO_INITIAL_GUESS) { 846 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 847 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 848 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 849 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 850 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 851 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 852 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 853 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 854 i2 = 7; 855 idiag += 49; 856 for (i=1; i<m; i++) { 857 v = aa + 49*ai[i]; 858 vi = aj + ai[i]; 859 nz = diag[i] - ai[i]; 860 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 861 while (nz--) { 862 idx = 7*(*vi++); 863 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 864 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 865 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 866 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 867 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 868 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 869 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 870 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 871 v += 49; 872 } 873 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 874 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 875 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 876 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 877 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 878 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 879 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 880 idiag += 49; 881 i2 += 7; 882 } 883 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 884 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 885 } 886 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 887 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 888 i2 = 0; 889 mdiag = a->idiag+49*a->mbs; 890 for (i=0; i<m; i++) { 891 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 892 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 893 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 894 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 895 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 896 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 897 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 898 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 899 mdiag += 36; 900 i2 += 6; 901 } 902 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 903 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 904 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 905 } 906 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 907 idiag = a->idiag+49*a->mbs - 49; 908 i2 = 7*m - 7; 909 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 910 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 911 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 912 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 913 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 914 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 915 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 916 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 917 idiag -= 49; 918 i2 -= 7; 919 for (i=m-2; i>=0; i--) { 920 v = aa + 49*(diag[i]+1); 921 vi = aj + diag[i] + 1; 922 nz = ai[i+1] - diag[i] - 1; 923 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 924 while (nz--) { 925 idx = 7*(*vi++); 926 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 927 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 928 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 929 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 930 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 931 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 932 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 933 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 934 v += 49; 935 } 936 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 937 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 938 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 939 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 940 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 941 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 942 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 943 idiag -= 49; 944 i2 -= 7; 945 } 946 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 947 } 948 } else { 949 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 950 } 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* 957 Special version for direct calls from Fortran (Used in PETSc-fun3d) 958 */ 959 #if defined(PETSC_HAVE_FORTRAN_CAPS) 960 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 961 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 962 #define matsetvaluesblocked4_ matsetvaluesblocked4 963 #endif 964 965 EXTERN_C_BEGIN 966 #undef __FUNCT__ 967 #define __FUNCT__ "matsetvaluesblocked4_" 968 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 969 { 970 Mat A = *AA; 971 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 972 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 973 PetscInt *ai=a->i,*ailen=a->ilen; 974 PetscInt *aj=a->j,stepval,lastcol = -1; 975 const PetscScalar *value = v; 976 MatScalar *ap,*aa = a->a,*bap; 977 978 PetscFunctionBegin; 979 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 980 stepval = (n-1)*4; 981 for (k=0; k<m; k++) { /* loop over added rows */ 982 row = im[k]; 983 rp = aj + ai[row]; 984 ap = aa + 16*ai[row]; 985 nrow = ailen[row]; 986 low = 0; 987 high = nrow; 988 for (l=0; l<n; l++) { /* loop over added columns */ 989 col = in[l]; 990 if (col <= lastcol) low = 0; else high = nrow; 991 lastcol = col; 992 value = v + k*(stepval+4 + l)*4; 993 while (high-low > 7) { 994 t = (low+high)/2; 995 if (rp[t] > col) high = t; 996 else low = t; 997 } 998 for (i=low; i<high; i++) { 999 if (rp[i] > col) break; 1000 if (rp[i] == col) { 1001 bap = ap + 16*i; 1002 for (ii=0; ii<4; ii++,value+=stepval) { 1003 for (jj=ii; jj<16; jj+=4) { 1004 bap[jj] += *value++; 1005 } 1006 } 1007 goto noinsert2; 1008 } 1009 } 1010 N = nrow++ - 1; 1011 high++; /* added new column index thus must search to one higher than before */ 1012 /* shift up all the later entries in this row */ 1013 for (ii=N; ii>=i; ii--) { 1014 rp[ii+1] = rp[ii]; 1015 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1016 } 1017 if (N >= i) { 1018 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1019 } 1020 rp[i] = col; 1021 bap = ap + 16*i; 1022 for (ii=0; ii<4; ii++,value+=stepval) { 1023 for (jj=ii; jj<16; jj+=4) { 1024 bap[jj] = *value++; 1025 } 1026 } 1027 noinsert2:; 1028 low = i; 1029 } 1030 ailen[row] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 EXTERN_C_END 1035 1036 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1037 #define matsetvalues4_ MATSETVALUES4 1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1039 #define matsetvalues4_ matsetvalues4 1040 #endif 1041 1042 EXTERN_C_BEGIN 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatSetValues4_" 1045 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1046 { 1047 Mat A = *AA; 1048 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1049 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1050 PetscInt *ai=a->i,*ailen=a->ilen; 1051 PetscInt *aj=a->j,brow,bcol; 1052 PetscInt ridx,cidx,lastcol = -1; 1053 MatScalar *ap,value,*aa=a->a,*bap; 1054 1055 PetscFunctionBegin; 1056 for (k=0; k<m; k++) { /* loop over added rows */ 1057 row = im[k]; brow = row/4; 1058 rp = aj + ai[brow]; 1059 ap = aa + 16*ai[brow]; 1060 nrow = ailen[brow]; 1061 low = 0; 1062 high = nrow; 1063 for (l=0; l<n; l++) { /* loop over added columns */ 1064 col = in[l]; bcol = col/4; 1065 ridx = row % 4; cidx = col % 4; 1066 value = v[l + k*n]; 1067 if (col <= lastcol) low = 0; else high = nrow; 1068 lastcol = col; 1069 while (high-low > 7) { 1070 t = (low+high)/2; 1071 if (rp[t] > bcol) high = t; 1072 else low = t; 1073 } 1074 for (i=low; i<high; i++) { 1075 if (rp[i] > bcol) break; 1076 if (rp[i] == bcol) { 1077 bap = ap + 16*i + 4*cidx + ridx; 1078 *bap += value; 1079 goto noinsert1; 1080 } 1081 } 1082 N = nrow++ - 1; 1083 high++; /* added new column thus must search to one higher than before */ 1084 /* shift up all the later entries in this row */ 1085 for (ii=N; ii>=i; ii--) { 1086 rp[ii+1] = rp[ii]; 1087 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1088 } 1089 if (N>=i) { 1090 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1091 } 1092 rp[i] = bcol; 1093 ap[16*i + 4*cidx + ridx] = value; 1094 noinsert1:; 1095 low = i; 1096 } 1097 ailen[brow] = nrow; 1098 } 1099 PetscFunctionReturnVoid(); 1100 } 1101 EXTERN_C_END 1102 1103 /* 1104 Checks for missing diagonals 1105 */ 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1108 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1109 { 1110 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt *diag,*jj = a->j,i; 1113 1114 PetscFunctionBegin; 1115 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1116 *missing = PETSC_FALSE; 1117 if (A->rmap->n > 0 && !jj) { 1118 *missing = PETSC_TRUE; 1119 if (d) *d = 0; 1120 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1121 } else { 1122 diag = a->diag; 1123 for (i=0; i<a->mbs; i++) { 1124 if (jj[diag[i]] != i) { 1125 *missing = PETSC_TRUE; 1126 if (d) *d = i; 1127 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1128 } 1129 } 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1136 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1137 { 1138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 PetscInt i,j,m = a->mbs; 1141 1142 PetscFunctionBegin; 1143 if (!a->diag) { 1144 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1145 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1146 a->free_diag = PETSC_TRUE; 1147 } 1148 for (i=0; i<m; i++) { 1149 a->diag[i] = a->i[i+1]; 1150 for (j=a->i[i]; j<a->i[i+1]; j++) { 1151 if (a->j[j] == i) { 1152 a->diag[i] = j; 1153 break; 1154 } 1155 } 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 1161 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1165 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1166 { 1167 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1170 PetscInt *tia, *tja; 1171 1172 PetscFunctionBegin; 1173 *nn = n; 1174 if (!ia) PetscFunctionReturn(0); 1175 if (symmetric) { 1176 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1177 nz = tia[n]; 1178 } else { 1179 tia = a->i; tja = a->j; 1180 } 1181 1182 if (!blockcompressed && bs > 1) { 1183 (*nn) *= bs; 1184 nbs = bs; 1185 /* malloc & create the natural set of indices */ 1186 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1187 if (n) { 1188 (*ia)[0] = 0; 1189 for (j=1; j<bs; j++) { 1190 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1191 } 1192 } 1193 1194 for (i=1; i<n; i++) { 1195 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1196 for (j=1; j<bs; j++) { 1197 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1198 } 1199 } 1200 if (n) { 1201 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1202 } 1203 1204 if (ja) { 1205 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1206 cnt = 0; 1207 for (i=0; i<n; i++) { 1208 for (j=0; j<bs; j++) { 1209 for (k=tia[i]; k<tia[i+1]; k++) { 1210 for (l=0; l<bs; l++) { 1211 (*ja)[cnt++] = bs*tja[k] + l; 1212 } 1213 } 1214 } 1215 } 1216 } 1217 1218 n *= bs; 1219 nz *= bs*bs; 1220 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1221 ierr = PetscFree(tia);CHKERRQ(ierr); 1222 ierr = PetscFree(tja);CHKERRQ(ierr); 1223 } 1224 } else if (oshift == 1) { 1225 if (symmetric) { 1226 PetscInt nz = tia[A->rmap->n/bs]; 1227 /* add 1 to i and j indices */ 1228 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1229 *ia = tia; 1230 if (ja) { 1231 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1232 *ja = tja; 1233 } 1234 } else { 1235 PetscInt nz = a->i[A->rmap->n/bs]; 1236 /* malloc space and add 1 to i and j indices */ 1237 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1238 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1239 if (ja) { 1240 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1241 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1242 } 1243 } 1244 } else { 1245 *ia = tia; 1246 if (ja) *ja = tja; 1247 } 1248 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1255 { 1256 PetscErrorCode ierr; 1257 1258 PetscFunctionBegin; 1259 if (!ia) PetscFunctionReturn(0); 1260 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1261 ierr = PetscFree(*ia);CHKERRQ(ierr); 1262 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1270 { 1271 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1277 #endif 1278 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1279 if (a->row) { 1280 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1281 } 1282 if (a->col) { 1283 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1284 } 1285 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1286 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1287 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1288 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1289 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1290 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1291 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1292 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1293 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1294 1295 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1296 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1297 ierr = PetscFree(a);CHKERRQ(ierr); 1298 1299 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1314 { 1315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 switch (op) { 1320 case MAT_ROW_ORIENTED: 1321 a->roworiented = flg; 1322 break; 1323 case MAT_KEEP_NONZERO_PATTERN: 1324 a->keepnonzeropattern = flg; 1325 break; 1326 case MAT_NEW_NONZERO_LOCATIONS: 1327 a->nonew = (flg ? 0 : 1); 1328 break; 1329 case MAT_NEW_NONZERO_LOCATION_ERR: 1330 a->nonew = (flg ? -1 : 0); 1331 break; 1332 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1333 a->nonew = (flg ? -2 : 0); 1334 break; 1335 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1336 a->nounused = (flg ? -1 : 0); 1337 break; 1338 case MAT_NEW_DIAGONALS: 1339 case MAT_IGNORE_OFF_PROC_ENTRIES: 1340 case MAT_USE_HASH_TABLE: 1341 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1342 break; 1343 case MAT_SYMMETRIC: 1344 case MAT_STRUCTURALLY_SYMMETRIC: 1345 case MAT_HERMITIAN: 1346 case MAT_SYMMETRY_ETERNAL: 1347 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1348 break; 1349 default: 1350 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1358 { 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1362 MatScalar *aa,*aa_i; 1363 PetscScalar *v_i; 1364 1365 PetscFunctionBegin; 1366 bs = A->rmap->bs; 1367 ai = a->i; 1368 aj = a->j; 1369 aa = a->a; 1370 bs2 = a->bs2; 1371 1372 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1373 1374 bn = row/bs; /* Block number */ 1375 bp = row % bs; /* Block Position */ 1376 M = ai[bn+1] - ai[bn]; 1377 *nz = bs*M; 1378 1379 if (v) { 1380 *v = 0; 1381 if (*nz) { 1382 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1383 for (i=0; i<M; i++) { /* for each block in the block row */ 1384 v_i = *v + i*bs; 1385 aa_i = aa + bs2*(ai[bn] + i); 1386 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1387 } 1388 } 1389 } 1390 1391 if (idx) { 1392 *idx = 0; 1393 if (*nz) { 1394 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1395 for (i=0; i<M; i++) { /* for each block in the block row */ 1396 idx_i = *idx + i*bs; 1397 itmp = bs*aj[ai[bn] + i]; 1398 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1399 } 1400 } 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1408 { 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1413 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1414 PetscFunctionReturn(0); 1415 } 1416 1417 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1422 { 1423 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1424 Mat C; 1425 PetscErrorCode ierr; 1426 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1427 PetscInt *rows,*cols,bs2=a->bs2; 1428 MatScalar *array; 1429 1430 PetscFunctionBegin; 1431 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1432 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1433 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1434 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1435 1436 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1437 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1438 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1439 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1440 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1441 ierr = PetscFree(col);CHKERRQ(ierr); 1442 } else { 1443 C = *B; 1444 } 1445 1446 array = a->a; 1447 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1448 for (i=0; i<mbs; i++) { 1449 cols[0] = i*bs; 1450 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1451 len = ai[i+1] - ai[i]; 1452 for (j=0; j<len; j++) { 1453 rows[0] = (*aj++)*bs; 1454 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1455 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1456 array += bs2; 1457 } 1458 } 1459 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1460 1461 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 1464 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1465 *B = C; 1466 } else { 1467 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1474 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1479 int fd; 1480 PetscScalar *aa; 1481 FILE *file; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1485 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1486 col_lens[0] = MAT_FILE_CLASSID; 1487 1488 col_lens[1] = A->rmap->N; 1489 col_lens[2] = A->cmap->n; 1490 col_lens[3] = a->nz*bs2; 1491 1492 /* store lengths of each row and write (including header) to file */ 1493 count = 0; 1494 for (i=0; i<a->mbs; i++) { 1495 for (j=0; j<bs; j++) { 1496 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1497 } 1498 } 1499 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1500 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1501 1502 /* store column indices (zero start index) */ 1503 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1504 count = 0; 1505 for (i=0; i<a->mbs; i++) { 1506 for (j=0; j<bs; j++) { 1507 for (k=a->i[i]; k<a->i[i+1]; k++) { 1508 for (l=0; l<bs; l++) { 1509 jj[count++] = bs*a->j[k] + l; 1510 } 1511 } 1512 } 1513 } 1514 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1515 ierr = PetscFree(jj);CHKERRQ(ierr); 1516 1517 /* store nonzero values */ 1518 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1519 count = 0; 1520 for (i=0; i<a->mbs; i++) { 1521 for (j=0; j<bs; j++) { 1522 for (k=a->i[i]; k<a->i[i+1]; k++) { 1523 for (l=0; l<bs; l++) { 1524 aa[count++] = a->a[bs2*k + l*bs + j]; 1525 } 1526 } 1527 } 1528 } 1529 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1530 ierr = PetscFree(aa);CHKERRQ(ierr); 1531 1532 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1533 if (file) { 1534 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1542 { 1543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1546 PetscViewerFormat format; 1547 1548 PetscFunctionBegin; 1549 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1550 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1551 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1552 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1553 Mat aij; 1554 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1555 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1556 ierr = MatDestroy(aij);CHKERRQ(ierr); 1557 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1558 PetscFunctionReturn(0); 1559 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1560 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1561 for (i=0; i<a->mbs; i++) { 1562 for (j=0; j<bs; j++) { 1563 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1564 for (k=a->i[i]; k<a->i[i+1]; k++) { 1565 for (l=0; l<bs; l++) { 1566 #if defined(PETSC_USE_COMPLEX) 1567 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1569 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1572 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1574 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1575 } 1576 #else 1577 if (a->a[bs2*k + l*bs + j] != 0.0) { 1578 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1579 } 1580 #endif 1581 } 1582 } 1583 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1584 } 1585 } 1586 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1587 } else { 1588 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1589 for (i=0; i<a->mbs; i++) { 1590 for (j=0; j<bs; j++) { 1591 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1592 for (k=a->i[i]; k<a->i[i+1]; k++) { 1593 for (l=0; l<bs; l++) { 1594 #if defined(PETSC_USE_COMPLEX) 1595 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1596 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1597 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1598 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1599 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1600 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1601 } else { 1602 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1603 } 1604 #else 1605 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1606 #endif 1607 } 1608 } 1609 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1613 } 1614 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1620 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1621 { 1622 Mat A = (Mat) Aa; 1623 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1624 PetscErrorCode ierr; 1625 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1626 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1627 MatScalar *aa; 1628 PetscViewer viewer; 1629 1630 PetscFunctionBegin; 1631 1632 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1633 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1634 1635 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1636 1637 /* loop over matrix elements drawing boxes */ 1638 color = PETSC_DRAW_BLUE; 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 if (PetscRealPart(*aa++) >= 0.) continue; 1647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 } 1652 color = PETSC_DRAW_CYAN; 1653 for (i=0,row=0; i<mbs; i++,row+=bs) { 1654 for (j=a->i[i]; j<a->i[i+1]; j++) { 1655 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1656 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1657 aa = a->a + j*bs2; 1658 for (k=0; k<bs; k++) { 1659 for (l=0; l<bs; l++) { 1660 if (PetscRealPart(*aa++) != 0.) continue; 1661 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1662 } 1663 } 1664 } 1665 } 1666 1667 color = PETSC_DRAW_RED; 1668 for (i=0,row=0; i<mbs; i++,row+=bs) { 1669 for (j=a->i[i]; j<a->i[i+1]; j++) { 1670 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1671 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1672 aa = a->a + j*bs2; 1673 for (k=0; k<bs; k++) { 1674 for (l=0; l<bs; l++) { 1675 if (PetscRealPart(*aa++) <= 0.) continue; 1676 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1687 { 1688 PetscErrorCode ierr; 1689 PetscReal xl,yl,xr,yr,w,h; 1690 PetscDraw draw; 1691 PetscTruth isnull; 1692 1693 PetscFunctionBegin; 1694 1695 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1696 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1697 1698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1699 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1700 xr += w; yr += h; xl = -w; yl = -h; 1701 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1702 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatView_SeqBAIJ" 1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1710 { 1711 PetscErrorCode ierr; 1712 PetscTruth iascii,isbinary,isdraw; 1713 1714 PetscFunctionBegin; 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1717 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1718 if (iascii){ 1719 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1720 } else if (isbinary) { 1721 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1722 } else if (isdraw) { 1723 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1724 } else { 1725 Mat B; 1726 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1727 ierr = MatView(B,viewer);CHKERRQ(ierr); 1728 ierr = MatDestroy(B);CHKERRQ(ierr); 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1736 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1737 { 1738 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1739 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1740 PetscInt *ai = a->i,*ailen = a->ilen; 1741 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1742 MatScalar *ap,*aa = a->a; 1743 1744 PetscFunctionBegin; 1745 for (k=0; k<m; k++) { /* loop over rows */ 1746 row = im[k]; brow = row/bs; 1747 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1748 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1749 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1750 nrow = ailen[brow]; 1751 for (l=0; l<n; l++) { /* loop over columns */ 1752 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1753 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1754 col = in[l] ; 1755 bcol = col/bs; 1756 cidx = col%bs; 1757 ridx = row%bs; 1758 high = nrow; 1759 low = 0; /* assume unsorted */ 1760 while (high-low > 5) { 1761 t = (low+high)/2; 1762 if (rp[t] > bcol) high = t; 1763 else low = t; 1764 } 1765 for (i=low; i<high; i++) { 1766 if (rp[i] > bcol) break; 1767 if (rp[i] == bcol) { 1768 *v++ = ap[bs2*i+bs*cidx+ridx]; 1769 goto finished; 1770 } 1771 } 1772 *v++ = 0.0; 1773 finished:; 1774 } 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1782 { 1783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1784 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1785 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1786 PetscErrorCode ierr; 1787 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1788 PetscTruth roworiented=a->roworiented; 1789 const PetscScalar *value = v; 1790 MatScalar *ap,*aa = a->a,*bap; 1791 1792 PetscFunctionBegin; 1793 if (roworiented) { 1794 stepval = (n-1)*bs; 1795 } else { 1796 stepval = (m-1)*bs; 1797 } 1798 for (k=0; k<m; k++) { /* loop over added rows */ 1799 row = im[k]; 1800 if (row < 0) continue; 1801 #if defined(PETSC_USE_DEBUG) 1802 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1803 #endif 1804 rp = aj + ai[row]; 1805 ap = aa + bs2*ai[row]; 1806 rmax = imax[row]; 1807 nrow = ailen[row]; 1808 low = 0; 1809 high = nrow; 1810 for (l=0; l<n; l++) { /* loop over added columns */ 1811 if (in[l] < 0) continue; 1812 #if defined(PETSC_USE_DEBUG) 1813 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1814 #endif 1815 col = in[l]; 1816 if (roworiented) { 1817 value = v + k*(stepval+bs)*bs + l*bs; 1818 } else { 1819 value = v + l*(stepval+bs)*bs + k*bs; 1820 } 1821 if (col <= lastcol) low = 0; else high = nrow; 1822 lastcol = col; 1823 while (high-low > 7) { 1824 t = (low+high)/2; 1825 if (rp[t] > col) high = t; 1826 else low = t; 1827 } 1828 for (i=low; i<high; i++) { 1829 if (rp[i] > col) break; 1830 if (rp[i] == col) { 1831 bap = ap + bs2*i; 1832 if (roworiented) { 1833 if (is == ADD_VALUES) { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] += *value++; 1837 } 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] = *value++; 1843 } 1844 } 1845 } 1846 } else { 1847 if (is == ADD_VALUES) { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 *bap++ += *value++; 1851 } 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 *bap++ = *value++; 1857 } 1858 } 1859 } 1860 } 1861 goto noinsert2; 1862 } 1863 } 1864 if (nonew == 1) goto noinsert2; 1865 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 N = nrow++ - 1; high++; 1868 /* shift up all the later entries in this row */ 1869 for (ii=N; ii>=i; ii--) { 1870 rp[ii+1] = rp[ii]; 1871 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1872 } 1873 if (N >= i) { 1874 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 rp[i] = col; 1877 bap = ap + bs2*i; 1878 if (roworiented) { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=ii; jj<bs2; jj+=bs) { 1881 bap[jj] = *value++; 1882 } 1883 } 1884 } else { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=0; jj<bs; jj++) { 1887 *bap++ = *value++; 1888 } 1889 } 1890 } 1891 noinsert2:; 1892 low = i; 1893 } 1894 ailen[row] = nrow; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1905 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1906 PetscErrorCode ierr; 1907 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1908 MatScalar *aa = a->a,*ap; 1909 PetscReal ratio=0.6; 1910 1911 PetscFunctionBegin; 1912 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1913 1914 if (m) rmax = ailen[0]; 1915 for (i=1; i<mbs; i++) { 1916 /* move each row back by the amount of empty slots (fshift) before it*/ 1917 fshift += imax[i-1] - ailen[i-1]; 1918 rmax = PetscMax(rmax,ailen[i]); 1919 if (fshift) { 1920 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1921 N = ailen[i]; 1922 for (j=0; j<N; j++) { 1923 ip[j-fshift] = ip[j]; 1924 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1925 } 1926 } 1927 ai[i] = ai[i-1] + ailen[i-1]; 1928 } 1929 if (mbs) { 1930 fshift += imax[mbs-1] - ailen[mbs-1]; 1931 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1932 } 1933 /* reset ilen and imax for each row */ 1934 for (i=0; i<mbs; i++) { 1935 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1936 } 1937 a->nz = ai[mbs]; 1938 1939 /* diagonals may have moved, so kill the diagonal pointers */ 1940 a->idiagvalid = PETSC_FALSE; 1941 if (fshift && a->diag) { 1942 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1944 a->diag = 0; 1945 } 1946 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1947 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1950 A->info.mallocs += a->reallocs; 1951 a->reallocs = 0; 1952 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1953 1954 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1955 if (a->compressedrow.use){ 1956 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1957 } 1958 1959 A->same_nonzero = PETSC_TRUE; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 /* 1964 This function returns an array of flags which indicate the locations of contiguous 1965 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1966 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1967 Assume: sizes should be long enough to hold all the values. 1968 */ 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1972 { 1973 PetscInt i,j,k,row; 1974 PetscTruth flg; 1975 1976 PetscFunctionBegin; 1977 for (i=0,j=0; i<n; j++) { 1978 row = idx[i]; 1979 if (row%bs!=0) { /* Not the begining of a block */ 1980 sizes[j] = 1; 1981 i++; 1982 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1983 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1984 i++; 1985 } else { /* Begining of the block, so check if the complete block exists */ 1986 flg = PETSC_TRUE; 1987 for (k=1; k<bs; k++) { 1988 if (row+k != idx[i+k]) { /* break in the block */ 1989 flg = PETSC_FALSE; 1990 break; 1991 } 1992 } 1993 if (flg) { /* No break in the bs */ 1994 sizes[j] = bs; 1995 i+= bs; 1996 } else { 1997 sizes[j] = 1; 1998 i++; 1999 } 2000 } 2001 } 2002 *bs_max = j; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2008 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2009 { 2010 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2011 PetscErrorCode ierr; 2012 PetscInt i,j,k,count,*rows; 2013 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2014 PetscScalar zero = 0.0; 2015 MatScalar *aa; 2016 2017 PetscFunctionBegin; 2018 /* Make a copy of the IS and sort it */ 2019 /* allocate memory for rows,sizes */ 2020 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2021 2022 /* copy IS values to rows, and sort them */ 2023 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2024 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2025 if (baij->keepnonzeropattern) { 2026 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2027 bs_max = is_n; 2028 A->same_nonzero = PETSC_TRUE; 2029 } else { 2030 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2031 A->same_nonzero = PETSC_FALSE; 2032 } 2033 2034 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2035 row = rows[j]; 2036 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2037 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2038 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2039 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2040 if (diag != 0.0) { 2041 if (baij->ilen[row/bs] > 0) { 2042 baij->ilen[row/bs] = 1; 2043 baij->j[baij->i[row/bs]] = row/bs; 2044 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2045 } 2046 /* Now insert all the diagonal values for this bs */ 2047 for (k=0; k<bs; k++) { 2048 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2049 } 2050 } else { /* (diag == 0.0) */ 2051 baij->ilen[row/bs] = 0; 2052 } /* end (diag == 0.0) */ 2053 } else { /* (sizes[i] != bs) */ 2054 #if defined (PETSC_USE_DEBUG) 2055 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2056 #endif 2057 for (k=0; k<count; k++) { 2058 aa[0] = zero; 2059 aa += bs; 2060 } 2061 if (diag != 0.0) { 2062 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2063 } 2064 } 2065 } 2066 2067 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2074 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2075 { 2076 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2077 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2078 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2079 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2080 PetscErrorCode ierr; 2081 PetscInt ridx,cidx,bs2=a->bs2; 2082 PetscTruth roworiented=a->roworiented; 2083 MatScalar *ap,value,*aa=a->a,*bap; 2084 2085 PetscFunctionBegin; 2086 if (v) PetscValidScalarPointer(v,6); 2087 for (k=0; k<m; k++) { /* loop over added rows */ 2088 row = im[k]; 2089 brow = row/bs; 2090 if (row < 0) continue; 2091 #if defined(PETSC_USE_DEBUG) 2092 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2093 #endif 2094 rp = aj + ai[brow]; 2095 ap = aa + bs2*ai[brow]; 2096 rmax = imax[brow]; 2097 nrow = ailen[brow]; 2098 low = 0; 2099 high = nrow; 2100 for (l=0; l<n; l++) { /* loop over added columns */ 2101 if (in[l] < 0) continue; 2102 #if defined(PETSC_USE_DEBUG) 2103 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2104 #endif 2105 col = in[l]; bcol = col/bs; 2106 ridx = row % bs; cidx = col % bs; 2107 if (roworiented) { 2108 value = v[l + k*n]; 2109 } else { 2110 value = v[k + l*m]; 2111 } 2112 if (col <= lastcol) low = 0; else high = nrow; 2113 lastcol = col; 2114 while (high-low > 7) { 2115 t = (low+high)/2; 2116 if (rp[t] > bcol) high = t; 2117 else low = t; 2118 } 2119 for (i=low; i<high; i++) { 2120 if (rp[i] > bcol) break; 2121 if (rp[i] == bcol) { 2122 bap = ap + bs2*i + bs*cidx + ridx; 2123 if (is == ADD_VALUES) *bap += value; 2124 else *bap = value; 2125 goto noinsert1; 2126 } 2127 } 2128 if (nonew == 1) goto noinsert1; 2129 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2130 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2131 N = nrow++ - 1; high++; 2132 /* shift up all the later entries in this row */ 2133 for (ii=N; ii>=i; ii--) { 2134 rp[ii+1] = rp[ii]; 2135 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2136 } 2137 if (N>=i) { 2138 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2139 } 2140 rp[i] = bcol; 2141 ap[bs2*i + bs*cidx + ridx] = value; 2142 a->nz++; 2143 noinsert1:; 2144 low = i; 2145 } 2146 ailen[brow] = nrow; 2147 } 2148 A->same_nonzero = PETSC_FALSE; 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2154 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2155 { 2156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2157 Mat outA; 2158 PetscErrorCode ierr; 2159 PetscTruth row_identity,col_identity; 2160 2161 PetscFunctionBegin; 2162 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2163 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2164 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2165 if (!row_identity || !col_identity) { 2166 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2167 } 2168 2169 outA = inA; 2170 inA->factortype = MAT_FACTOR_LU; 2171 2172 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2173 2174 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2175 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2176 a->row = row; 2177 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2178 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2179 a->col = col; 2180 2181 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2182 if (a->icol) { 2183 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2184 } 2185 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2186 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2187 2188 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2189 if (!a->solve_work) { 2190 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2191 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2192 } 2193 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2194 2195 PetscFunctionReturn(0); 2196 } 2197 2198 EXTERN_C_BEGIN 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2201 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2202 { 2203 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2204 PetscInt i,nz,mbs; 2205 2206 PetscFunctionBegin; 2207 nz = baij->maxnz/baij->bs2; 2208 mbs = baij->mbs; 2209 for (i=0; i<nz; i++) { 2210 baij->j[i] = indices[i]; 2211 } 2212 baij->nz = nz; 2213 for (i=0; i<mbs; i++) { 2214 baij->ilen[i] = baij->imax[i]; 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 EXTERN_C_END 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2222 /*@ 2223 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2224 in the matrix. 2225 2226 Input Parameters: 2227 + mat - the SeqBAIJ matrix 2228 - indices - the column indices 2229 2230 Level: advanced 2231 2232 Notes: 2233 This can be called if you have precomputed the nonzero structure of the 2234 matrix and want to provide it to the matrix object to improve the performance 2235 of the MatSetValues() operation. 2236 2237 You MUST have set the correct numbers of nonzeros per row in the call to 2238 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2239 2240 MUST be called before any calls to MatSetValues(); 2241 2242 @*/ 2243 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2244 { 2245 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2246 2247 PetscFunctionBegin; 2248 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2249 PetscValidPointer(indices,2); 2250 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2251 if (f) { 2252 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2253 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2259 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2260 { 2261 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2262 PetscErrorCode ierr; 2263 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2264 PetscReal atmp; 2265 PetscScalar *x,zero = 0.0; 2266 MatScalar *aa; 2267 PetscInt ncols,brow,krow,kcol; 2268 2269 PetscFunctionBegin; 2270 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2271 bs = A->rmap->bs; 2272 aa = a->a; 2273 ai = a->i; 2274 aj = a->j; 2275 mbs = a->mbs; 2276 2277 ierr = VecSet(v,zero);CHKERRQ(ierr); 2278 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2279 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2280 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2281 for (i=0; i<mbs; i++) { 2282 ncols = ai[1] - ai[0]; ai++; 2283 brow = bs*i; 2284 for (j=0; j<ncols; j++){ 2285 for (kcol=0; kcol<bs; kcol++){ 2286 for (krow=0; krow<bs; krow++){ 2287 atmp = PetscAbsScalar(*aa);aa++; 2288 row = brow + krow; /* row index */ 2289 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2290 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2291 } 2292 } 2293 aj++; 2294 } 2295 } 2296 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatCopy_SeqBAIJ" 2302 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2303 { 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 /* If the two matrices have the same copy implementation, use fast copy. */ 2308 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2310 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2311 2312 if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2313 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2314 } else { 2315 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2316 } 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2322 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2323 { 2324 PetscErrorCode ierr; 2325 2326 PetscFunctionBegin; 2327 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 #undef __FUNCT__ 2332 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2333 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2334 { 2335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2336 PetscFunctionBegin; 2337 *array = a->a; 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2343 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2344 { 2345 PetscFunctionBegin; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2351 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2352 { 2353 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2354 PetscErrorCode ierr; 2355 PetscInt i,bs=Y->rmap->bs,j,bs2; 2356 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2357 2358 PetscFunctionBegin; 2359 if (str == SAME_NONZERO_PATTERN) { 2360 PetscScalar alpha = a; 2361 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2362 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2363 if (y->xtoy && y->XtoY != X) { 2364 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2365 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2366 } 2367 if (!y->xtoy) { /* get xtoy */ 2368 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2369 y->XtoY = X; 2370 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2371 } 2372 bs2 = bs*bs; 2373 for (i=0; i<x->nz; i++) { 2374 j = 0; 2375 while (j < bs2){ 2376 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2377 j++; 2378 } 2379 } 2380 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2381 } else { 2382 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2389 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2390 { 2391 PetscInt rbs,cbs; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2396 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2397 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2398 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2404 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2405 { 2406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2407 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2408 MatScalar *aa = a->a; 2409 2410 PetscFunctionBegin; 2411 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2417 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2418 { 2419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2420 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2421 MatScalar *aa = a->a; 2422 2423 PetscFunctionBegin; 2424 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2432 /* 2433 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2434 */ 2435 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2436 { 2437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2440 PetscInt nz = a->i[m],row,*jj,mr,col; 2441 2442 PetscFunctionBegin; 2443 *nn = n; 2444 if (!ia) PetscFunctionReturn(0); 2445 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2446 else { 2447 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2448 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2449 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2450 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2451 jj = a->j; 2452 for (i=0; i<nz; i++) { 2453 collengths[jj[i]]++; 2454 } 2455 cia[0] = oshift; 2456 for (i=0; i<n; i++) { 2457 cia[i+1] = cia[i] + collengths[i]; 2458 } 2459 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2460 jj = a->j; 2461 for (row=0; row<m; row++) { 2462 mr = a->i[row+1] - a->i[row]; 2463 for (i=0; i<mr; i++) { 2464 col = *jj++; 2465 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2466 } 2467 } 2468 ierr = PetscFree(collengths);CHKERRQ(ierr); 2469 *ia = cia; *ja = cja; 2470 } 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2476 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2477 { 2478 PetscErrorCode ierr; 2479 2480 PetscFunctionBegin; 2481 if (!ia) PetscFunctionReturn(0); 2482 ierr = PetscFree(*ia);CHKERRQ(ierr); 2483 ierr = PetscFree(*ja);CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2489 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2490 { 2491 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2492 PetscErrorCode ierr; 2493 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2494 PetscScalar dx,*y,*xx,*w3_array; 2495 PetscScalar *vscale_array; 2496 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2497 Vec w1=coloring->w1,w2=coloring->w2,w3; 2498 void *fctx = coloring->fctx; 2499 PetscTruth flg = PETSC_FALSE; 2500 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2501 Vec x1_tmp; 2502 2503 PetscFunctionBegin; 2504 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2505 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2506 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2507 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2508 2509 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2510 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2511 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2512 if (flg) { 2513 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2514 } else { 2515 PetscTruth assembled; 2516 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2517 if (assembled) { 2518 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2519 } 2520 } 2521 2522 x1_tmp = x1; 2523 if (!coloring->vscale){ 2524 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2525 } 2526 2527 /* 2528 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2529 coloring->F for the coarser grids from the finest 2530 */ 2531 if (coloring->F) { 2532 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2533 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2534 if (m1 != m2) { 2535 coloring->F = 0; 2536 } 2537 } 2538 2539 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2540 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2541 } 2542 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2543 2544 /* Set w1 = F(x1) */ 2545 if (coloring->F) { 2546 w1 = coloring->F; /* use already computed value of function */ 2547 coloring->F = 0; 2548 } else { 2549 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2550 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2551 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2552 } 2553 2554 if (!coloring->w3) { 2555 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2556 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2557 } 2558 w3 = coloring->w3; 2559 2560 CHKMEMQ; 2561 /* Compute all the local scale factors, including ghost points */ 2562 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2563 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2564 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2565 if (ctype == IS_COLORING_GHOSTED){ 2566 col_start = 0; col_end = N; 2567 } else if (ctype == IS_COLORING_GLOBAL){ 2568 xx = xx - start; 2569 vscale_array = vscale_array - start; 2570 col_start = start; col_end = N + start; 2571 } CHKMEMQ; 2572 for (col=col_start; col<col_end; col++){ 2573 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2574 if (coloring->htype[0] == 'w') { 2575 dx = 1.0 + unorm; 2576 } else { 2577 dx = xx[col]; 2578 } 2579 if (dx == 0.0) dx = 1.0; 2580 #if !defined(PETSC_USE_COMPLEX) 2581 if (dx < umin && dx >= 0.0) dx = umin; 2582 else if (dx < 0.0 && dx > -umin) dx = -umin; 2583 #else 2584 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2585 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2586 #endif 2587 dx *= epsilon; 2588 vscale_array[col] = 1.0/dx; 2589 } CHKMEMQ; 2590 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2591 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2592 if (ctype == IS_COLORING_GLOBAL){ 2593 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2594 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2595 } 2596 CHKMEMQ; 2597 if (coloring->vscaleforrow) { 2598 vscaleforrow = coloring->vscaleforrow; 2599 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2600 2601 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2602 /* 2603 Loop over each color 2604 */ 2605 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2606 for (k=0; k<coloring->ncolors; k++) { 2607 coloring->currentcolor = k; 2608 for (i=0; i<bs; i++) { 2609 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2610 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2611 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2612 /* 2613 Loop over each column associated with color 2614 adding the perturbation to the vector w3. 2615 */ 2616 for (l=0; l<coloring->ncolumns[k]; l++) { 2617 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2618 if (coloring->htype[0] == 'w') { 2619 dx = 1.0 + unorm; 2620 } else { 2621 dx = xx[col]; 2622 } 2623 if (dx == 0.0) dx = 1.0; 2624 #if !defined(PETSC_USE_COMPLEX) 2625 if (dx < umin && dx >= 0.0) dx = umin; 2626 else if (dx < 0.0 && dx > -umin) dx = -umin; 2627 #else 2628 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2629 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2630 #endif 2631 dx *= epsilon; 2632 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2633 w3_array[col] += dx; 2634 } 2635 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2636 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2637 2638 /* 2639 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2640 w2 = F(x1 + dx) - F(x1) 2641 */ 2642 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2643 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2644 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2645 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2646 2647 /* 2648 Loop over rows of vector, putting results into Jacobian matrix 2649 */ 2650 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2651 for (l=0; l<coloring->nrows[k]; l++) { 2652 row = bs*coloring->rows[k][l]; /* local row index */ 2653 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2654 for (j=0; j<bs; j++) { 2655 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2656 srows[j] = row + start + j; 2657 } 2658 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2659 } 2660 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2661 } 2662 } /* endof for each color */ 2663 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2664 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2665 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2666 ierr = PetscFree(srows);CHKERRQ(ierr); 2667 2668 coloring->currentcolor = -1; 2669 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2670 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2671 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 /* -------------------------------------------------------------------*/ 2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2677 MatGetRow_SeqBAIJ, 2678 MatRestoreRow_SeqBAIJ, 2679 MatMult_SeqBAIJ_N, 2680 /* 4*/ MatMultAdd_SeqBAIJ_N, 2681 MatMultTranspose_SeqBAIJ, 2682 MatMultTransposeAdd_SeqBAIJ, 2683 0, 2684 0, 2685 0, 2686 /*10*/ 0, 2687 MatLUFactor_SeqBAIJ, 2688 0, 2689 0, 2690 MatTranspose_SeqBAIJ, 2691 /*15*/ MatGetInfo_SeqBAIJ, 2692 MatEqual_SeqBAIJ, 2693 MatGetDiagonal_SeqBAIJ, 2694 MatDiagonalScale_SeqBAIJ, 2695 MatNorm_SeqBAIJ, 2696 /*20*/ 0, 2697 MatAssemblyEnd_SeqBAIJ, 2698 MatSetOption_SeqBAIJ, 2699 MatZeroEntries_SeqBAIJ, 2700 /*24*/ MatZeroRows_SeqBAIJ, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2706 0, 2707 0, 2708 MatGetArray_SeqBAIJ, 2709 MatRestoreArray_SeqBAIJ, 2710 /*34*/ MatDuplicate_SeqBAIJ, 2711 0, 2712 0, 2713 MatILUFactor_SeqBAIJ, 2714 0, 2715 /*39*/ MatAXPY_SeqBAIJ, 2716 MatGetSubMatrices_SeqBAIJ, 2717 MatIncreaseOverlap_SeqBAIJ, 2718 MatGetValues_SeqBAIJ, 2719 MatCopy_SeqBAIJ, 2720 /*44*/ 0, 2721 MatScale_SeqBAIJ, 2722 0, 2723 0, 2724 0, 2725 /*49*/ MatSetBlockSize_SeqBAIJ, 2726 MatGetRowIJ_SeqBAIJ, 2727 MatRestoreRowIJ_SeqBAIJ, 2728 MatGetColumnIJ_SeqBAIJ, 2729 MatRestoreColumnIJ_SeqBAIJ, 2730 /*54*/ MatFDColoringCreate_SeqAIJ, 2731 0, 2732 0, 2733 0, 2734 MatSetValuesBlocked_SeqBAIJ, 2735 /*59*/ MatGetSubMatrix_SeqBAIJ, 2736 MatDestroy_SeqBAIJ, 2737 MatView_SeqBAIJ, 2738 0, 2739 0, 2740 /*64*/ 0, 2741 0, 2742 0, 2743 0, 2744 0, 2745 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2746 0, 2747 MatConvert_Basic, 2748 0, 2749 0, 2750 /*74*/ 0, 2751 MatFDColoringApply_BAIJ, 2752 0, 2753 0, 2754 0, 2755 /*79*/ 0, 2756 0, 2757 0, 2758 0, 2759 MatLoad_SeqBAIJ, 2760 /*84*/ 0, 2761 0, 2762 0, 2763 0, 2764 0, 2765 /*89*/ 0, 2766 0, 2767 0, 2768 0, 2769 0, 2770 /*94*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*99*/0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*104*/0, 2781 MatRealPart_SeqBAIJ, 2782 MatImaginaryPart_SeqBAIJ, 2783 0, 2784 0, 2785 /*109*/0, 2786 0, 2787 0, 2788 0, 2789 MatMissingDiagonal_SeqBAIJ, 2790 /*114*/0, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*119*/0, 2796 0, 2797 MatMultHermitianTranspose_SeqBAIJ, 2798 MatMultHermitianTransposeAdd_SeqBAIJ, 2799 MatLoadnew_SeqBAIJ 2800 }; 2801 2802 EXTERN_C_BEGIN 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2805 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2806 { 2807 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2808 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2809 PetscErrorCode ierr; 2810 2811 PetscFunctionBegin; 2812 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2813 2814 /* allocate space for values if not already there */ 2815 if (!aij->saved_values) { 2816 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2817 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2818 } 2819 2820 /* copy values over */ 2821 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2822 PetscFunctionReturn(0); 2823 } 2824 EXTERN_C_END 2825 2826 EXTERN_C_BEGIN 2827 #undef __FUNCT__ 2828 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2829 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2830 { 2831 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2832 PetscErrorCode ierr; 2833 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2834 2835 PetscFunctionBegin; 2836 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2837 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2838 2839 /* copy values over */ 2840 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 EXTERN_C_END 2844 2845 EXTERN_C_BEGIN 2846 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2847 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2848 EXTERN_C_END 2849 2850 EXTERN_C_BEGIN 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2854 { 2855 Mat_SeqBAIJ *b; 2856 PetscErrorCode ierr; 2857 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2858 PetscTruth flg,skipallocation = PETSC_FALSE; 2859 2860 PetscFunctionBegin; 2861 2862 if (nz == MAT_SKIP_ALLOCATION) { 2863 skipallocation = PETSC_TRUE; 2864 nz = 0; 2865 } 2866 2867 if (bs < 0) { 2868 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2869 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2870 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2871 bs = PetscAbs(bs); 2872 } 2873 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2874 bs = newbs; 2875 2876 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2877 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2878 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2879 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2880 2881 B->preallocated = PETSC_TRUE; 2882 2883 mbs = B->rmap->n/bs; 2884 nbs = B->cmap->n/bs; 2885 bs2 = bs*bs; 2886 2887 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2888 2889 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2890 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2891 if (nnz) { 2892 for (i=0; i<mbs; i++) { 2893 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2894 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2895 } 2896 } 2897 2898 b = (Mat_SeqBAIJ*)B->data; 2899 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2900 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2901 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2902 2903 if (!flg) { 2904 switch (bs) { 2905 case 1: 2906 B->ops->mult = MatMult_SeqBAIJ_1; 2907 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2908 B->ops->sor = MatSOR_SeqBAIJ_1; 2909 break; 2910 case 2: 2911 B->ops->mult = MatMult_SeqBAIJ_2; 2912 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2913 B->ops->sor = MatSOR_SeqBAIJ_2; 2914 break; 2915 case 3: 2916 B->ops->mult = MatMult_SeqBAIJ_3; 2917 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2918 B->ops->sor = MatSOR_SeqBAIJ_3; 2919 break; 2920 case 4: 2921 B->ops->mult = MatMult_SeqBAIJ_4; 2922 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2923 B->ops->sor = MatSOR_SeqBAIJ_4; 2924 break; 2925 case 5: 2926 B->ops->mult = MatMult_SeqBAIJ_5; 2927 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2928 B->ops->sor = MatSOR_SeqBAIJ_5; 2929 break; 2930 case 6: 2931 B->ops->mult = MatMult_SeqBAIJ_6; 2932 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2933 B->ops->sor = MatSOR_SeqBAIJ_6; 2934 break; 2935 case 7: 2936 B->ops->mult = MatMult_SeqBAIJ_7; 2937 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2938 B->ops->sor = MatSOR_SeqBAIJ_7; 2939 break; 2940 case 15: 2941 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2942 break; 2943 default: 2944 B->ops->mult = MatMult_SeqBAIJ_N; 2945 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2946 break; 2947 } 2948 } 2949 B->rmap->bs = bs; 2950 b->mbs = mbs; 2951 b->nbs = nbs; 2952 if (!skipallocation) { 2953 if (!b->imax) { 2954 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2955 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2956 b->free_imax_ilen = PETSC_TRUE; 2957 } 2958 /* b->ilen will count nonzeros in each block row so far. */ 2959 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2960 if (!nnz) { 2961 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2962 else if (nz <= 0) nz = 1; 2963 for (i=0; i<mbs; i++) b->imax[i] = nz; 2964 nz = nz*mbs; 2965 } else { 2966 nz = 0; 2967 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2968 } 2969 2970 /* allocate the matrix space */ 2971 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2972 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2973 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2974 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2975 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2976 b->singlemalloc = PETSC_TRUE; 2977 b->i[0] = 0; 2978 for (i=1; i<mbs+1; i++) { 2979 b->i[i] = b->i[i-1] + b->imax[i-1]; 2980 } 2981 b->free_a = PETSC_TRUE; 2982 b->free_ij = PETSC_TRUE; 2983 } else { 2984 b->free_a = PETSC_FALSE; 2985 b->free_ij = PETSC_FALSE; 2986 } 2987 2988 B->rmap->bs = bs; 2989 b->bs2 = bs2; 2990 b->mbs = mbs; 2991 b->nz = 0; 2992 b->maxnz = nz*bs2; 2993 B->info.nz_unneeded = (PetscReal)b->maxnz; 2994 PetscFunctionReturn(0); 2995 } 2996 EXTERN_C_END 2997 2998 EXTERN_C_BEGIN 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3001 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3002 { 3003 PetscInt i,m,nz,nz_max=0,*nnz; 3004 PetscScalar *values=0; 3005 PetscErrorCode ierr; 3006 3007 PetscFunctionBegin; 3008 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3009 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3010 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3011 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3012 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3013 m = B->rmap->n/bs; 3014 3015 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3016 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3017 for(i=0; i<m; i++) { 3018 nz = ii[i+1]- ii[i]; 3019 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3020 nz_max = PetscMax(nz_max, nz); 3021 nnz[i] = nz; 3022 } 3023 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3024 ierr = PetscFree(nnz);CHKERRQ(ierr); 3025 3026 values = (PetscScalar*)V; 3027 if (!values) { 3028 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3029 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3030 } 3031 for (i=0; i<m; i++) { 3032 PetscInt ncols = ii[i+1] - ii[i]; 3033 const PetscInt *icols = jj + ii[i]; 3034 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3035 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3036 } 3037 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3038 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 3041 PetscFunctionReturn(0); 3042 } 3043 EXTERN_C_END 3044 3045 3046 EXTERN_C_BEGIN 3047 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3048 #if defined(PETSC_HAVE_MUMPS) 3049 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3050 #endif 3051 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3052 EXTERN_C_END 3053 3054 /*MC 3055 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3056 block sparse compressed row format. 3057 3058 Options Database Keys: 3059 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3060 3061 Level: beginner 3062 3063 .seealso: MatCreateSeqBAIJ() 3064 M*/ 3065 3066 3067 EXTERN_C_BEGIN 3068 #undef __FUNCT__ 3069 #define __FUNCT__ "MatCreate_SeqBAIJ" 3070 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3071 { 3072 PetscErrorCode ierr; 3073 PetscMPIInt size; 3074 Mat_SeqBAIJ *b; 3075 3076 PetscFunctionBegin; 3077 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3078 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3079 3080 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3081 B->data = (void*)b; 3082 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3083 B->mapping = 0; 3084 b->row = 0; 3085 b->col = 0; 3086 b->icol = 0; 3087 b->reallocs = 0; 3088 b->saved_values = 0; 3089 3090 b->roworiented = PETSC_TRUE; 3091 b->nonew = 0; 3092 b->diag = 0; 3093 b->solve_work = 0; 3094 b->mult_work = 0; 3095 B->spptr = 0; 3096 B->info.nz_unneeded = (PetscReal)b->maxnz; 3097 b->keepnonzeropattern = PETSC_FALSE; 3098 b->xtoy = 0; 3099 b->XtoY = 0; 3100 b->compressedrow.use = PETSC_FALSE; 3101 b->compressedrow.nrows = 0; 3102 b->compressedrow.i = PETSC_NULL; 3103 b->compressedrow.rindex = PETSC_NULL; 3104 b->compressedrow.checked = PETSC_FALSE; 3105 B->same_nonzero = PETSC_FALSE; 3106 3107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3108 "MatGetFactorAvailable_seqbaij_petsc", 3109 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3111 "MatGetFactor_seqbaij_petsc", 3112 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3113 #if defined(PETSC_HAVE_MUMPS) 3114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3115 #endif 3116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3117 "MatInvertBlockDiagonal_SeqBAIJ", 3118 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3120 "MatStoreValues_SeqBAIJ", 3121 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3123 "MatRetrieveValues_SeqBAIJ", 3124 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3126 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3127 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3129 "MatConvert_SeqBAIJ_SeqAIJ", 3130 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3132 "MatConvert_SeqBAIJ_SeqSBAIJ", 3133 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3135 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3136 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3138 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3139 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3140 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3141 PetscFunctionReturn(0); 3142 } 3143 EXTERN_C_END 3144 3145 #undef __FUNCT__ 3146 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3147 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3148 { 3149 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3150 PetscErrorCode ierr; 3151 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3152 3153 PetscFunctionBegin; 3154 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3155 3156 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3157 c->imax = a->imax; 3158 c->ilen = a->ilen; 3159 c->free_imax_ilen = PETSC_FALSE; 3160 } else { 3161 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3162 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3163 for (i=0; i<mbs; i++) { 3164 c->imax[i] = a->imax[i]; 3165 c->ilen[i] = a->ilen[i]; 3166 } 3167 c->free_imax_ilen = PETSC_TRUE; 3168 } 3169 3170 /* allocate the matrix space */ 3171 if (mallocmatspace){ 3172 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3173 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3174 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3175 c->singlemalloc = PETSC_FALSE; 3176 c->free_ij = PETSC_FALSE; 3177 c->i = a->i; 3178 c->j = a->j; 3179 c->parent = A; 3180 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3181 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3182 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3183 } else { 3184 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3185 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3186 c->singlemalloc = PETSC_TRUE; 3187 c->free_ij = PETSC_TRUE; 3188 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3189 if (mbs > 0) { 3190 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3191 if (cpvalues == MAT_COPY_VALUES) { 3192 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3193 } else { 3194 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3195 } 3196 } 3197 } 3198 } 3199 3200 c->roworiented = a->roworiented; 3201 c->nonew = a->nonew; 3202 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3203 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3204 c->bs2 = a->bs2; 3205 c->mbs = a->mbs; 3206 c->nbs = a->nbs; 3207 3208 if (a->diag) { 3209 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3210 c->diag = a->diag; 3211 c->free_diag = PETSC_FALSE; 3212 } else { 3213 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3214 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3215 for (i=0; i<mbs; i++) { 3216 c->diag[i] = a->diag[i]; 3217 } 3218 c->free_diag = PETSC_TRUE; 3219 } 3220 } else c->diag = 0; 3221 c->nz = a->nz; 3222 c->maxnz = bs2*a->nz; /* Since we allocate exactly the right amount */ 3223 c->solve_work = 0; 3224 c->mult_work = 0; 3225 c->free_a = PETSC_TRUE; 3226 c->free_ij = PETSC_TRUE; 3227 C->preallocated = PETSC_TRUE; 3228 C->assembled = PETSC_TRUE; 3229 3230 c->compressedrow.use = a->compressedrow.use; 3231 c->compressedrow.nrows = a->compressedrow.nrows; 3232 c->compressedrow.checked = a->compressedrow.checked; 3233 if (a->compressedrow.checked && a->compressedrow.use){ 3234 i = a->compressedrow.nrows; 3235 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3236 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3237 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3238 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3239 } else { 3240 c->compressedrow.use = PETSC_FALSE; 3241 c->compressedrow.i = PETSC_NULL; 3242 c->compressedrow.rindex = PETSC_NULL; 3243 } 3244 C->same_nonzero = A->same_nonzero; 3245 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3246 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 #undef __FUNCT__ 3251 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3252 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3253 { 3254 PetscErrorCode ierr; 3255 3256 PetscFunctionBegin; 3257 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3258 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3259 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3260 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatLoad_SeqBAIJ" 3266 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3267 { 3268 Mat_SeqBAIJ *a; 3269 Mat B; 3270 PetscErrorCode ierr; 3271 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3272 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3273 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3274 PetscInt *masked,nmask,tmp,bs2,ishift; 3275 PetscMPIInt size; 3276 int fd; 3277 PetscScalar *aa; 3278 MPI_Comm comm = ((PetscObject)viewer)->comm; 3279 3280 PetscFunctionBegin; 3281 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3282 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3283 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3284 bs2 = bs*bs; 3285 3286 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3287 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3288 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3289 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3290 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3291 M = header[1]; N = header[2]; nz = header[3]; 3292 3293 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3294 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3295 3296 /* 3297 This code adds extra rows to make sure the number of rows is 3298 divisible by the blocksize 3299 */ 3300 mbs = M/bs; 3301 extra_rows = bs - M + bs*(mbs); 3302 if (extra_rows == bs) extra_rows = 0; 3303 else mbs++; 3304 if (extra_rows) { 3305 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3306 } 3307 3308 /* read in row lengths */ 3309 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3310 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3311 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3312 3313 /* read in column indices */ 3314 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3315 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3316 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3317 3318 /* loop over row lengths determining block row lengths */ 3319 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3320 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3321 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3322 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3323 rowcount = 0; 3324 nzcount = 0; 3325 for (i=0; i<mbs; i++) { 3326 nmask = 0; 3327 for (j=0; j<bs; j++) { 3328 kmax = rowlengths[rowcount]; 3329 for (k=0; k<kmax; k++) { 3330 tmp = jj[nzcount++]/bs; 3331 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3332 } 3333 rowcount++; 3334 } 3335 browlengths[i] += nmask; 3336 /* zero out the mask elements we set */ 3337 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3338 } 3339 3340 /* create our matrix */ 3341 ierr = MatCreate(comm,&B); 3342 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3343 ierr = MatSetType(B,type);CHKERRQ(ierr); 3344 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3345 a = (Mat_SeqBAIJ*)B->data; 3346 3347 /* set matrix "i" values */ 3348 a->i[0] = 0; 3349 for (i=1; i<= mbs; i++) { 3350 a->i[i] = a->i[i-1] + browlengths[i-1]; 3351 a->ilen[i-1] = browlengths[i-1]; 3352 } 3353 a->nz = 0; 3354 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3355 3356 /* read in nonzero values */ 3357 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3358 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3359 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3360 3361 /* set "a" and "j" values into matrix */ 3362 nzcount = 0; jcount = 0; 3363 for (i=0; i<mbs; i++) { 3364 nzcountb = nzcount; 3365 nmask = 0; 3366 for (j=0; j<bs; j++) { 3367 kmax = rowlengths[i*bs+j]; 3368 for (k=0; k<kmax; k++) { 3369 tmp = jj[nzcount++]/bs; 3370 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3371 } 3372 } 3373 /* sort the masked values */ 3374 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3375 3376 /* set "j" values into matrix */ 3377 maskcount = 1; 3378 for (j=0; j<nmask; j++) { 3379 a->j[jcount++] = masked[j]; 3380 mask[masked[j]] = maskcount++; 3381 } 3382 /* set "a" values into matrix */ 3383 ishift = bs2*a->i[i]; 3384 for (j=0; j<bs; j++) { 3385 kmax = rowlengths[i*bs+j]; 3386 for (k=0; k<kmax; k++) { 3387 tmp = jj[nzcountb]/bs ; 3388 block = mask[tmp] - 1; 3389 point = jj[nzcountb] - bs*tmp; 3390 idx = ishift + bs2*block + j + bs*point; 3391 a->a[idx] = (MatScalar)aa[nzcountb++]; 3392 } 3393 } 3394 /* zero out the mask elements we set */ 3395 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3396 } 3397 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3398 3399 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3400 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3401 ierr = PetscFree(aa);CHKERRQ(ierr); 3402 ierr = PetscFree(jj);CHKERRQ(ierr); 3403 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3404 3405 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3406 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3407 ierr = MatView_Private(B);CHKERRQ(ierr); 3408 3409 *A = B; 3410 PetscFunctionReturn(0); 3411 } 3412 3413 #undef __FUNCT__ 3414 #define __FUNCT__ "MatLoadnew_SeqBAIJ" 3415 PetscErrorCode MatLoadnew_SeqBAIJ(PetscViewer viewer,Mat newmat) 3416 { 3417 Mat_SeqBAIJ *a; 3418 PetscErrorCode ierr; 3419 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3420 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3421 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3422 PetscInt *masked,nmask,tmp,bs2,ishift; 3423 PetscMPIInt size; 3424 int fd; 3425 PetscScalar *aa; 3426 MPI_Comm comm = ((PetscObject)viewer)->comm; 3427 3428 PetscFunctionBegin; 3429 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3430 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3431 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3432 bs2 = bs*bs; 3433 3434 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3435 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3436 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3437 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3438 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3439 M = header[1]; N = header[2]; nz = header[3]; 3440 3441 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3442 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3443 3444 /* 3445 This code adds extra rows to make sure the number of rows is 3446 divisible by the blocksize 3447 */ 3448 mbs = M/bs; 3449 extra_rows = bs - M + bs*(mbs); 3450 if (extra_rows == bs) extra_rows = 0; 3451 else mbs++; 3452 if (extra_rows) { 3453 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3454 } 3455 3456 /* Set global sizes if not already set */ 3457 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3458 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3459 } else { /* Check if the matrix global sizes are correct */ 3460 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3461 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3462 } 3463 3464 /* read in row lengths */ 3465 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3466 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3467 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3468 3469 /* read in column indices */ 3470 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3471 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3472 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3473 3474 /* loop over row lengths determining block row lengths */ 3475 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3476 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3477 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3478 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3479 rowcount = 0; 3480 nzcount = 0; 3481 for (i=0; i<mbs; i++) { 3482 nmask = 0; 3483 for (j=0; j<bs; j++) { 3484 kmax = rowlengths[rowcount]; 3485 for (k=0; k<kmax; k++) { 3486 tmp = jj[nzcount++]/bs; 3487 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3488 } 3489 rowcount++; 3490 } 3491 browlengths[i] += nmask; 3492 /* zero out the mask elements we set */ 3493 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3494 } 3495 3496 /* Do preallocation */ 3497 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3498 a = (Mat_SeqBAIJ*)newmat->data; 3499 3500 /* set matrix "i" values */ 3501 a->i[0] = 0; 3502 for (i=1; i<= mbs; i++) { 3503 a->i[i] = a->i[i-1] + browlengths[i-1]; 3504 a->ilen[i-1] = browlengths[i-1]; 3505 } 3506 a->nz = 0; 3507 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3508 3509 /* read in nonzero values */ 3510 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3511 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3512 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3513 3514 /* set "a" and "j" values into matrix */ 3515 nzcount = 0; jcount = 0; 3516 for (i=0; i<mbs; i++) { 3517 nzcountb = nzcount; 3518 nmask = 0; 3519 for (j=0; j<bs; j++) { 3520 kmax = rowlengths[i*bs+j]; 3521 for (k=0; k<kmax; k++) { 3522 tmp = jj[nzcount++]/bs; 3523 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3524 } 3525 } 3526 /* sort the masked values */ 3527 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3528 3529 /* set "j" values into matrix */ 3530 maskcount = 1; 3531 for (j=0; j<nmask; j++) { 3532 a->j[jcount++] = masked[j]; 3533 mask[masked[j]] = maskcount++; 3534 } 3535 /* set "a" values into matrix */ 3536 ishift = bs2*a->i[i]; 3537 for (j=0; j<bs; j++) { 3538 kmax = rowlengths[i*bs+j]; 3539 for (k=0; k<kmax; k++) { 3540 tmp = jj[nzcountb]/bs ; 3541 block = mask[tmp] - 1; 3542 point = jj[nzcountb] - bs*tmp; 3543 idx = ishift + bs2*block + j + bs*point; 3544 a->a[idx] = (MatScalar)aa[nzcountb++]; 3545 } 3546 } 3547 /* zero out the mask elements we set */ 3548 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3549 } 3550 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3551 3552 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3553 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3554 ierr = PetscFree(aa);CHKERRQ(ierr); 3555 ierr = PetscFree(jj);CHKERRQ(ierr); 3556 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3557 3558 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3559 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3560 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatCreateSeqBAIJ" 3566 /*@C 3567 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3568 compressed row) format. For good matrix assembly performance the 3569 user should preallocate the matrix storage by setting the parameter nz 3570 (or the array nnz). By setting these parameters accurately, performance 3571 during matrix assembly can be increased by more than a factor of 50. 3572 3573 Collective on MPI_Comm 3574 3575 Input Parameters: 3576 + comm - MPI communicator, set to PETSC_COMM_SELF 3577 . bs - size of block 3578 . m - number of rows 3579 . n - number of columns 3580 . nz - number of nonzero blocks per block row (same for all rows) 3581 - nnz - array containing the number of nonzero blocks in the various block rows 3582 (possibly different for each block row) or PETSC_NULL 3583 3584 Output Parameter: 3585 . A - the matrix 3586 3587 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3588 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3589 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3590 3591 Options Database Keys: 3592 . -mat_no_unroll - uses code that does not unroll the loops in the 3593 block calculations (much slower) 3594 . -mat_block_size - size of the blocks to use 3595 3596 Level: intermediate 3597 3598 Notes: 3599 The number of rows and columns must be divisible by blocksize. 3600 3601 If the nnz parameter is given then the nz parameter is ignored 3602 3603 A nonzero block is any block that as 1 or more nonzeros in it 3604 3605 The block AIJ format is fully compatible with standard Fortran 77 3606 storage. That is, the stored row and column indices can begin at 3607 either one (as in Fortran) or zero. See the users' manual for details. 3608 3609 Specify the preallocated storage with either nz or nnz (not both). 3610 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3611 allocation. For additional details, see the users manual chapter on 3612 matrices. 3613 3614 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3615 @*/ 3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3617 { 3618 PetscErrorCode ierr; 3619 3620 PetscFunctionBegin; 3621 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3622 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3623 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3624 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3625 PetscFunctionReturn(0); 3626 } 3627 3628 #undef __FUNCT__ 3629 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3630 /*@C 3631 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3632 per row in the matrix. For good matrix assembly performance the 3633 user should preallocate the matrix storage by setting the parameter nz 3634 (or the array nnz). By setting these parameters accurately, performance 3635 during matrix assembly can be increased by more than a factor of 50. 3636 3637 Collective on MPI_Comm 3638 3639 Input Parameters: 3640 + A - the matrix 3641 . bs - size of block 3642 . nz - number of block nonzeros per block row (same for all rows) 3643 - nnz - array containing the number of block nonzeros in the various block rows 3644 (possibly different for each block row) or PETSC_NULL 3645 3646 Options Database Keys: 3647 . -mat_no_unroll - uses code that does not unroll the loops in the 3648 block calculations (much slower) 3649 . -mat_block_size - size of the blocks to use 3650 3651 Level: intermediate 3652 3653 Notes: 3654 If the nnz parameter is given then the nz parameter is ignored 3655 3656 You can call MatGetInfo() to get information on how effective the preallocation was; 3657 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3658 You can also run with the option -info and look for messages with the string 3659 malloc in them to see if additional memory allocation was needed. 3660 3661 The block AIJ format is fully compatible with standard Fortran 77 3662 storage. That is, the stored row and column indices can begin at 3663 either one (as in Fortran) or zero. See the users' manual for details. 3664 3665 Specify the preallocated storage with either nz or nnz (not both). 3666 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3667 allocation. For additional details, see the users manual chapter on 3668 matrices. 3669 3670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3671 @*/ 3672 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3673 { 3674 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3675 3676 PetscFunctionBegin; 3677 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3678 if (f) { 3679 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3680 } 3681 PetscFunctionReturn(0); 3682 } 3683 3684 #undef __FUNCT__ 3685 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3686 /*@C 3687 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3688 (the default sequential PETSc format). 3689 3690 Collective on MPI_Comm 3691 3692 Input Parameters: 3693 + A - the matrix 3694 . i - the indices into j for the start of each local row (starts with zero) 3695 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3696 - v - optional values in the matrix 3697 3698 Level: developer 3699 3700 .keywords: matrix, aij, compressed row, sparse 3701 3702 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3703 @*/ 3704 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3705 { 3706 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3707 3708 PetscFunctionBegin; 3709 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3710 if (f) { 3711 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3712 } 3713 PetscFunctionReturn(0); 3714 } 3715 3716 3717 #undef __FUNCT__ 3718 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3719 /*@ 3720 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3721 (upper triangular entries in CSR format) provided by the user. 3722 3723 Collective on MPI_Comm 3724 3725 Input Parameters: 3726 + comm - must be an MPI communicator of size 1 3727 . bs - size of block 3728 . m - number of rows 3729 . n - number of columns 3730 . i - row indices 3731 . j - column indices 3732 - a - matrix values 3733 3734 Output Parameter: 3735 . mat - the matrix 3736 3737 Level: intermediate 3738 3739 Notes: 3740 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3741 once the matrix is destroyed 3742 3743 You cannot set new nonzero locations into this matrix, that will generate an error. 3744 3745 The i and j indices are 0 based 3746 3747 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3748 3749 @*/ 3750 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3751 { 3752 PetscErrorCode ierr; 3753 PetscInt ii; 3754 Mat_SeqBAIJ *baij; 3755 3756 PetscFunctionBegin; 3757 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3758 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3759 3760 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3761 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3762 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3763 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3764 baij = (Mat_SeqBAIJ*)(*mat)->data; 3765 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3766 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3767 3768 baij->i = i; 3769 baij->j = j; 3770 baij->a = a; 3771 baij->singlemalloc = PETSC_FALSE; 3772 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3773 baij->free_a = PETSC_FALSE; 3774 baij->free_ij = PETSC_FALSE; 3775 3776 for (ii=0; ii<m; ii++) { 3777 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3778 #if defined(PETSC_USE_DEBUG) 3779 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3780 #endif 3781 } 3782 #if defined(PETSC_USE_DEBUG) 3783 for (ii=0; ii<baij->i[m]; ii++) { 3784 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3785 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3786 } 3787 #endif 3788 3789 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3791 PetscFunctionReturn(0); 3792 } 3793