1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr,(*f)(Mat); 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 32 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33 if (f) { 34 ierr = (*f)(mat);CHKERRQ(ierr); 35 } else { 36 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 EXTERN_C_BEGIN 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45 { 46 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 47 PetscErrorCode ierr; 48 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5]; 49 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25]; 50 PetscReal shift = 0.0; 51 52 PetscFunctionBegin; 53 if (a->idiagvalid) PetscFunctionReturn(0); 54 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 55 diag_offset = a->diag; 56 if (!a->idiag) { 57 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 58 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 138 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,j; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 for (j=0; j<nz; j++) { 175 s1 -= v[j]*x[vi[j]]; 176 } 177 x[i2] = idiag[0]*s1; 178 idiag += 1; 179 i2 += 1; 180 } 181 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 182 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 183 } 184 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 185 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 186 i2 = 0; 187 mdiag = a->idiag+a->mbs; 188 for (i=0; i<m; i++) { 189 x1 = x[i2]; 190 x[i2] = mdiag[0]*x1; 191 mdiag += 1; 192 i2 += 1; 193 } 194 ierr = PetscLogFlops(m);CHKERRQ(ierr); 195 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 196 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 197 } 198 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 199 idiag = a->idiag+a->mbs - 1; 200 i2 = m - 1; 201 x1 = x[i2]; 202 x[i2] = idiag[0]*x1; 203 idiag -= 1; 204 i2 -= 1; 205 for (i=m-2; i>=0; i--) { 206 v = aa + (diag[i]+1); 207 vi = aj + diag[i] + 1; 208 nz = ai[i+1] - diag[i] - 1; 209 s1 = x[i2]; 210 for (j=0; j<nz; j++) { 211 s1 -= v[j]*x[vi[j]]; 212 } 213 x[i2] = idiag[0]*s1; 214 idiag -= 1; 215 i2 -= 1; 216 } 217 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 218 } 219 } else { 220 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 221 } 222 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 #undef __FUNCT__ 228 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 229 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 230 { 231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 232 PetscScalar *x,x1,x2,s1,s2; 233 const PetscScalar *b; 234 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 235 PetscErrorCode ierr; 236 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 237 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 238 239 PetscFunctionBegin; 240 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 241 its = its*lits; 242 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 243 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 244 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 245 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 246 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 247 248 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 249 250 diag = a->diag; 251 idiag = a->idiag; 252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 254 255 if (flag & SOR_ZERO_INITIAL_GUESS) { 256 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 257 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 258 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 259 i2 = 2; 260 idiag += 4; 261 for (i=1; i<m; i++) { 262 v = aa + 4*ai[i]; 263 vi = aj + ai[i]; 264 nz = diag[i] - ai[i]; 265 s1 = b[i2]; s2 = b[i2+1]; 266 for (j=0; j<nz; j++) { 267 idx = 2*vi[j]; 268 it = 4*j; 269 x1 = x[idx]; x2 = x[1+idx]; 270 s1 -= v[it]*x1 + v[it+2]*x2; 271 s2 -= v[it+1]*x1 + v[it+3]*x2; 272 } 273 x[i2] = idiag[0]*s1 + idiag[2]*s2; 274 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 275 idiag += 4; 276 i2 += 2; 277 } 278 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 279 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 280 } 281 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 282 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 283 i2 = 0; 284 mdiag = a->idiag+4*a->mbs; 285 for (i=0; i<m; i++) { 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 288 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 289 mdiag += 4; 290 i2 += 2; 291 } 292 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 293 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 294 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 295 } 296 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 297 idiag = a->idiag+4*a->mbs - 4; 298 i2 = 2*m - 2; 299 x1 = x[i2]; x2 = x[i2+1]; 300 x[i2] = idiag[0]*x1 + idiag[2]*x2; 301 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 302 idiag -= 4; 303 i2 -= 2; 304 for (i=m-2; i>=0; i--) { 305 v = aa + 4*(diag[i]+1); 306 vi = aj + diag[i] + 1; 307 nz = ai[i+1] - diag[i] - 1; 308 s1 = x[i2]; s2 = x[i2+1]; 309 for (j=0; j<nz; j++) { 310 idx = 2*vi[j]; 311 it = 4*j; 312 x1 = x[idx]; x2 = x[1+idx]; 313 s1 -= v[it]*x1 + v[it+2]*x2; 314 s2 -= v[it+1]*x1 + v[it+3]*x2; 315 } 316 x[i2] = idiag[0]*s1 + idiag[2]*s2; 317 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 318 idiag -= 4; 319 i2 -= 2; 320 } 321 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 322 } 323 } else { 324 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 325 } 326 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 327 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 333 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 334 { 335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 336 PetscScalar *x,x1,x2,x3,s1,s2,s3; 337 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 338 const PetscScalar *b; 339 PetscErrorCode ierr; 340 PetscInt m = a->mbs,i,i2,nz,idx; 341 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 342 343 PetscFunctionBegin; 344 its = its*lits; 345 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 346 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 347 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 348 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 349 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 350 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 351 352 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 353 354 diag = a->diag; 355 idiag = a->idiag; 356 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 358 359 if (flag & SOR_ZERO_INITIAL_GUESS) { 360 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 361 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 362 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 363 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 364 i2 = 3; 365 idiag += 9; 366 for (i=1; i<m; i++) { 367 v = aa + 9*ai[i]; 368 vi = aj + ai[i]; 369 nz = diag[i] - ai[i]; 370 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 371 while (nz--) { 372 idx = 3*(*vi++); 373 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 374 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 375 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 376 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 377 v += 9; 378 } 379 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 380 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 381 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 382 idiag += 9; 383 i2 += 3; 384 } 385 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 387 } 388 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390 i2 = 0; 391 mdiag = a->idiag+9*a->mbs; 392 for (i=0; i<m; i++) { 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 394 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 395 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 396 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 397 mdiag += 9; 398 i2 += 3; 399 } 400 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 401 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 402 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 403 } 404 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 405 idiag = a->idiag+9*a->mbs - 9; 406 i2 = 3*m - 3; 407 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 408 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 409 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 410 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 411 idiag -= 9; 412 i2 -= 3; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 9*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 418 while (nz--) { 419 idx = 3*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 421 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 422 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 423 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 424 v += 9; 425 } 426 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 427 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 428 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 429 idiag -= 9; 430 i2 -= 3; 431 } 432 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 433 } 434 } else { 435 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 436 } 437 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 444 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 445 { 446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 447 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 448 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 449 const PetscScalar *b; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 461 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 476 i2 = 4; 477 idiag += 16; 478 for (i=1; i<m; i++) { 479 v = aa + 16*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 483 while (nz--) { 484 idx = 4*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 486 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 487 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 488 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 489 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 490 v += 16; 491 } 492 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 493 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 494 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 495 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 496 idiag += 16; 497 i2 += 4; 498 } 499 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 500 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 501 } 502 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 503 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 504 i2 = 0; 505 mdiag = a->idiag+16*a->mbs; 506 for (i=0; i<m; i++) { 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 508 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 509 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 510 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 511 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 512 mdiag += 16; 513 i2 += 4; 514 } 515 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 516 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 517 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 518 } 519 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 520 idiag = a->idiag+16*a->mbs - 16; 521 i2 = 4*m - 4; 522 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 523 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 524 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 525 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 526 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 527 idiag -= 16; 528 i2 -= 4; 529 for (i=m-2; i>=0; i--) { 530 v = aa + 16*(diag[i]+1); 531 vi = aj + diag[i] + 1; 532 nz = ai[i+1] - diag[i] - 1; 533 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 534 while (nz--) { 535 idx = 4*(*vi++); 536 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 537 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 538 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 539 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 540 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 541 v += 16; 542 } 543 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 544 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 545 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 546 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 547 idiag -= 16; 548 i2 -= 4; 549 } 550 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 551 } 552 } else { 553 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 554 } 555 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 562 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 563 { 564 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 565 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 566 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 567 const PetscScalar *b; 568 PetscErrorCode ierr; 569 PetscInt m = a->mbs,i,i2,nz,idx; 570 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 571 572 PetscFunctionBegin; 573 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 574 its = its*lits; 575 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 576 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 577 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 578 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 579 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 580 581 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 582 583 diag = a->diag; 584 idiag = a->idiag; 585 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 586 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 587 588 if (flag & SOR_ZERO_INITIAL_GUESS) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 590 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 591 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 592 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 593 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 594 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 595 i2 = 5; 596 idiag += 25; 597 for (i=1; i<m; i++) { 598 v = aa + 25*ai[i]; 599 vi = aj + ai[i]; 600 nz = diag[i] - ai[i]; 601 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 602 while (nz--) { 603 idx = 5*(*vi++); 604 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 605 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 606 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 607 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 608 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 609 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 610 v += 25; 611 } 612 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 613 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 614 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 615 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 616 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 617 idiag += 25; 618 i2 += 5; 619 } 620 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 621 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 622 } 623 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 624 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 625 i2 = 0; 626 mdiag = a->idiag+25*a->mbs; 627 for (i=0; i<m; i++) { 628 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 629 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 630 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 631 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 632 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 633 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 634 mdiag += 25; 635 i2 += 5; 636 } 637 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 638 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 639 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 640 } 641 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 642 idiag = a->idiag+25*a->mbs - 25; 643 i2 = 5*m - 5; 644 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 645 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 646 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 647 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 648 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 649 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 650 idiag -= 25; 651 i2 -= 5; 652 for (i=m-2; i>=0; i--) { 653 v = aa + 25*(diag[i]+1); 654 vi = aj + diag[i] + 1; 655 nz = ai[i+1] - diag[i] - 1; 656 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 657 while (nz--) { 658 idx = 5*(*vi++); 659 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 660 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 661 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 662 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 663 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 664 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 665 v += 25; 666 } 667 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 668 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 669 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 670 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 671 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 672 idiag -= 25; 673 i2 -= 5; 674 } 675 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 676 } 677 } else { 678 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 679 } 680 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 681 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 687 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 691 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 692 const PetscScalar *b; 693 PetscErrorCode ierr; 694 PetscInt m = a->mbs,i,i2,nz,idx; 695 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 696 697 PetscFunctionBegin; 698 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 699 its = its*lits; 700 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 701 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 702 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 703 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 704 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 705 706 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 707 708 diag = a->diag; 709 idiag = a->idiag; 710 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 711 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 712 713 if (flag & SOR_ZERO_INITIAL_GUESS) { 714 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 715 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 716 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 717 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 718 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 719 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 720 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 721 i2 = 6; 722 idiag += 36; 723 for (i=1; i<m; i++) { 724 v = aa + 36*ai[i]; 725 vi = aj + ai[i]; 726 nz = diag[i] - ai[i]; 727 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 728 while (nz--) { 729 idx = 6*(*vi++); 730 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 731 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 732 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 733 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 734 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 735 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 736 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 737 v += 36; 738 } 739 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 740 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 741 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 742 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 743 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 744 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 745 idiag += 36; 746 i2 += 6; 747 } 748 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 749 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 750 } 751 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 752 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 753 i2 = 0; 754 mdiag = a->idiag+36*a->mbs; 755 for (i=0; i<m; i++) { 756 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 757 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 758 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 759 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 760 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 761 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 762 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 763 mdiag += 36; 764 i2 += 6; 765 } 766 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 767 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 768 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 769 } 770 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 771 idiag = a->idiag+36*a->mbs - 36; 772 i2 = 6*m - 6; 773 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 774 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 775 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 776 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 777 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 778 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 779 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 780 idiag -= 36; 781 i2 -= 6; 782 for (i=m-2; i>=0; i--) { 783 v = aa + 36*(diag[i]+1); 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 787 while (nz--) { 788 idx = 6*(*vi++); 789 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 790 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 791 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 792 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 793 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 794 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 795 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 796 v += 36; 797 } 798 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 799 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 800 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 801 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 802 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 803 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 804 idiag -= 36; 805 i2 -= 6; 806 } 807 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 808 } 809 } else { 810 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 811 } 812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 813 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 819 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 820 { 821 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 822 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 823 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 824 const PetscScalar *b; 825 PetscErrorCode ierr; 826 PetscInt m = a->mbs,i,i2,nz,idx; 827 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 828 829 PetscFunctionBegin; 830 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 831 its = its*lits; 832 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 833 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 834 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 835 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 836 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 837 838 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 839 840 diag = a->diag; 841 idiag = a->idiag; 842 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 843 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 844 845 if (flag & SOR_ZERO_INITIAL_GUESS) { 846 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 847 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 848 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 849 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 850 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 851 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 852 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 853 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 854 i2 = 7; 855 idiag += 49; 856 for (i=1; i<m; i++) { 857 v = aa + 49*ai[i]; 858 vi = aj + ai[i]; 859 nz = diag[i] - ai[i]; 860 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 861 while (nz--) { 862 idx = 7*(*vi++); 863 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 864 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 865 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 866 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 867 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 868 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 869 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 870 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 871 v += 49; 872 } 873 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 874 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 875 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 876 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 877 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 878 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 879 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 880 idiag += 49; 881 i2 += 7; 882 } 883 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 884 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 885 } 886 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 887 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 888 i2 = 0; 889 mdiag = a->idiag+49*a->mbs; 890 for (i=0; i<m; i++) { 891 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 892 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 893 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 894 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 895 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 896 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 897 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 898 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 899 mdiag += 36; 900 i2 += 6; 901 } 902 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 903 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 904 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 905 } 906 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 907 idiag = a->idiag+49*a->mbs - 49; 908 i2 = 7*m - 7; 909 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 910 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 911 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 912 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 913 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 914 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 915 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 916 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 917 idiag -= 49; 918 i2 -= 7; 919 for (i=m-2; i>=0; i--) { 920 v = aa + 49*(diag[i]+1); 921 vi = aj + diag[i] + 1; 922 nz = ai[i+1] - diag[i] - 1; 923 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 924 while (nz--) { 925 idx = 7*(*vi++); 926 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 927 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 928 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 929 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 930 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 931 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 932 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 933 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 934 v += 49; 935 } 936 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 937 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 938 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 939 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 940 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 941 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 942 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 943 idiag -= 49; 944 i2 -= 7; 945 } 946 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 947 } 948 } else { 949 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 950 } 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* 957 Special version for direct calls from Fortran (Used in PETSc-fun3d) 958 */ 959 #if defined(PETSC_HAVE_FORTRAN_CAPS) 960 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 961 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 962 #define matsetvaluesblocked4_ matsetvaluesblocked4 963 #endif 964 965 EXTERN_C_BEGIN 966 #undef __FUNCT__ 967 #define __FUNCT__ "matsetvaluesblocked4_" 968 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 969 { 970 Mat A = *AA; 971 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 972 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 973 PetscInt *ai=a->i,*ailen=a->ilen; 974 PetscInt *aj=a->j,stepval,lastcol = -1; 975 const PetscScalar *value = v; 976 MatScalar *ap,*aa = a->a,*bap; 977 978 PetscFunctionBegin; 979 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 980 stepval = (n-1)*4; 981 for (k=0; k<m; k++) { /* loop over added rows */ 982 row = im[k]; 983 rp = aj + ai[row]; 984 ap = aa + 16*ai[row]; 985 nrow = ailen[row]; 986 low = 0; 987 high = nrow; 988 for (l=0; l<n; l++) { /* loop over added columns */ 989 col = in[l]; 990 if (col <= lastcol) low = 0; else high = nrow; 991 lastcol = col; 992 value = v + k*(stepval+4 + l)*4; 993 while (high-low > 7) { 994 t = (low+high)/2; 995 if (rp[t] > col) high = t; 996 else low = t; 997 } 998 for (i=low; i<high; i++) { 999 if (rp[i] > col) break; 1000 if (rp[i] == col) { 1001 bap = ap + 16*i; 1002 for (ii=0; ii<4; ii++,value+=stepval) { 1003 for (jj=ii; jj<16; jj+=4) { 1004 bap[jj] += *value++; 1005 } 1006 } 1007 goto noinsert2; 1008 } 1009 } 1010 N = nrow++ - 1; 1011 high++; /* added new column index thus must search to one higher than before */ 1012 /* shift up all the later entries in this row */ 1013 for (ii=N; ii>=i; ii--) { 1014 rp[ii+1] = rp[ii]; 1015 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1016 } 1017 if (N >= i) { 1018 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1019 } 1020 rp[i] = col; 1021 bap = ap + 16*i; 1022 for (ii=0; ii<4; ii++,value+=stepval) { 1023 for (jj=ii; jj<16; jj+=4) { 1024 bap[jj] = *value++; 1025 } 1026 } 1027 noinsert2:; 1028 low = i; 1029 } 1030 ailen[row] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 EXTERN_C_END 1035 1036 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1037 #define matsetvalues4_ MATSETVALUES4 1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1039 #define matsetvalues4_ matsetvalues4 1040 #endif 1041 1042 EXTERN_C_BEGIN 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatSetValues4_" 1045 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1046 { 1047 Mat A = *AA; 1048 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1049 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1050 PetscInt *ai=a->i,*ailen=a->ilen; 1051 PetscInt *aj=a->j,brow,bcol; 1052 PetscInt ridx,cidx,lastcol = -1; 1053 MatScalar *ap,value,*aa=a->a,*bap; 1054 1055 PetscFunctionBegin; 1056 for (k=0; k<m; k++) { /* loop over added rows */ 1057 row = im[k]; brow = row/4; 1058 rp = aj + ai[brow]; 1059 ap = aa + 16*ai[brow]; 1060 nrow = ailen[brow]; 1061 low = 0; 1062 high = nrow; 1063 for (l=0; l<n; l++) { /* loop over added columns */ 1064 col = in[l]; bcol = col/4; 1065 ridx = row % 4; cidx = col % 4; 1066 value = v[l + k*n]; 1067 if (col <= lastcol) low = 0; else high = nrow; 1068 lastcol = col; 1069 while (high-low > 7) { 1070 t = (low+high)/2; 1071 if (rp[t] > bcol) high = t; 1072 else low = t; 1073 } 1074 for (i=low; i<high; i++) { 1075 if (rp[i] > bcol) break; 1076 if (rp[i] == bcol) { 1077 bap = ap + 16*i + 4*cidx + ridx; 1078 *bap += value; 1079 goto noinsert1; 1080 } 1081 } 1082 N = nrow++ - 1; 1083 high++; /* added new column thus must search to one higher than before */ 1084 /* shift up all the later entries in this row */ 1085 for (ii=N; ii>=i; ii--) { 1086 rp[ii+1] = rp[ii]; 1087 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1088 } 1089 if (N>=i) { 1090 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1091 } 1092 rp[i] = bcol; 1093 ap[16*i + 4*cidx + ridx] = value; 1094 noinsert1:; 1095 low = i; 1096 } 1097 ailen[brow] = nrow; 1098 } 1099 PetscFunctionReturnVoid(); 1100 } 1101 EXTERN_C_END 1102 1103 /* 1104 Checks for missing diagonals 1105 */ 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1108 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1109 { 1110 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt *diag,*jj = a->j,i; 1113 1114 PetscFunctionBegin; 1115 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1116 *missing = PETSC_FALSE; 1117 if (A->rmap->n > 0 && !jj) { 1118 *missing = PETSC_TRUE; 1119 if (d) *d = 0; 1120 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1121 } else { 1122 diag = a->diag; 1123 for (i=0; i<a->mbs; i++) { 1124 if (jj[diag[i]] != i) { 1125 *missing = PETSC_TRUE; 1126 if (d) *d = i; 1127 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1128 } 1129 } 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1136 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1137 { 1138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 PetscInt i,j,m = a->mbs; 1141 1142 PetscFunctionBegin; 1143 if (!a->diag) { 1144 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1145 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1146 a->free_diag = PETSC_TRUE; 1147 } 1148 for (i=0; i<m; i++) { 1149 a->diag[i] = a->i[i+1]; 1150 for (j=a->i[i]; j<a->i[i+1]; j++) { 1151 if (a->j[j] == i) { 1152 a->diag[i] = j; 1153 break; 1154 } 1155 } 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 1161 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1165 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1166 { 1167 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1170 PetscInt *tia, *tja; 1171 1172 PetscFunctionBegin; 1173 *nn = n; 1174 if (!ia) PetscFunctionReturn(0); 1175 if (symmetric) { 1176 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1177 nz = tia[n]; 1178 } else { 1179 tia = a->i; tja = a->j; 1180 } 1181 1182 if (!blockcompressed && bs > 1) { 1183 (*nn) *= bs; 1184 /* malloc & create the natural set of indices */ 1185 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1186 if (n) { 1187 (*ia)[0] = 0; 1188 for (j=1; j<bs; j++) { 1189 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1190 } 1191 } 1192 1193 for (i=1; i<n; i++) { 1194 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1195 for (j=1; j<bs; j++) { 1196 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1197 } 1198 } 1199 if (n) { 1200 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1201 } 1202 1203 if (ja) { 1204 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1205 cnt = 0; 1206 for (i=0; i<n; i++) { 1207 for (j=0; j<bs; j++) { 1208 for (k=tia[i]; k<tia[i+1]; k++) { 1209 for (l=0; l<bs; l++) { 1210 (*ja)[cnt++] = bs*tja[k] + l; 1211 } 1212 } 1213 } 1214 } 1215 } 1216 1217 n *= bs; 1218 nz *= bs*bs; 1219 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1220 ierr = PetscFree(tia);CHKERRQ(ierr); 1221 ierr = PetscFree(tja);CHKERRQ(ierr); 1222 } 1223 } else if (oshift == 1) { 1224 if (symmetric) { 1225 PetscInt nz = tia[A->rmap->n/bs]; 1226 /* add 1 to i and j indices */ 1227 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1228 *ia = tia; 1229 if (ja) { 1230 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1231 *ja = tja; 1232 } 1233 } else { 1234 PetscInt nz = a->i[A->rmap->n/bs]; 1235 /* malloc space and add 1 to i and j indices */ 1236 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1237 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1238 if (ja) { 1239 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1240 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1241 } 1242 } 1243 } else { 1244 *ia = tia; 1245 if (ja) *ja = tja; 1246 } 1247 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1254 { 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 if (!ia) PetscFunctionReturn(0); 1259 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1260 ierr = PetscFree(*ia);CHKERRQ(ierr); 1261 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1269 { 1270 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1276 #endif 1277 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1278 if (a->row) { 1279 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1280 } 1281 if (a->col) { 1282 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1283 } 1284 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1285 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1286 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1287 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1288 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1289 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1290 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1291 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1292 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1293 1294 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1295 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1296 ierr = PetscFree(a);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 switch (op) { 1319 case MAT_ROW_ORIENTED: 1320 a->roworiented = flg; 1321 break; 1322 case MAT_KEEP_NONZERO_PATTERN: 1323 a->keepnonzeropattern = flg; 1324 break; 1325 case MAT_NEW_NONZERO_LOCATIONS: 1326 a->nonew = (flg ? 0 : 1); 1327 break; 1328 case MAT_NEW_NONZERO_LOCATION_ERR: 1329 a->nonew = (flg ? -1 : 0); 1330 break; 1331 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1332 a->nonew = (flg ? -2 : 0); 1333 break; 1334 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1335 a->nounused = (flg ? -1 : 0); 1336 break; 1337 case MAT_NEW_DIAGONALS: 1338 case MAT_IGNORE_OFF_PROC_ENTRIES: 1339 case MAT_USE_HASH_TABLE: 1340 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1341 break; 1342 case MAT_SYMMETRIC: 1343 case MAT_STRUCTURALLY_SYMMETRIC: 1344 case MAT_HERMITIAN: 1345 case MAT_SYMMETRY_ETERNAL: 1346 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1347 break; 1348 default: 1349 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1357 { 1358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1361 MatScalar *aa,*aa_i; 1362 PetscScalar *v_i; 1363 1364 PetscFunctionBegin; 1365 bs = A->rmap->bs; 1366 ai = a->i; 1367 aj = a->j; 1368 aa = a->a; 1369 bs2 = a->bs2; 1370 1371 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1372 1373 bn = row/bs; /* Block number */ 1374 bp = row % bs; /* Block Position */ 1375 M = ai[bn+1] - ai[bn]; 1376 *nz = bs*M; 1377 1378 if (v) { 1379 *v = 0; 1380 if (*nz) { 1381 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1382 for (i=0; i<M; i++) { /* for each block in the block row */ 1383 v_i = *v + i*bs; 1384 aa_i = aa + bs2*(ai[bn] + i); 1385 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1386 } 1387 } 1388 } 1389 1390 if (idx) { 1391 *idx = 0; 1392 if (*nz) { 1393 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1394 for (i=0; i<M; i++) { /* for each block in the block row */ 1395 idx_i = *idx + i*bs; 1396 itmp = bs*aj[ai[bn] + i]; 1397 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1398 } 1399 } 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1407 { 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1412 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1413 PetscFunctionReturn(0); 1414 } 1415 1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1421 { 1422 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1423 Mat C; 1424 PetscErrorCode ierr; 1425 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1426 PetscInt *rows,*cols,bs2=a->bs2; 1427 MatScalar *array; 1428 1429 PetscFunctionBegin; 1430 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1431 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1432 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1433 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1434 1435 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1436 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1437 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1438 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1439 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1440 ierr = PetscFree(col);CHKERRQ(ierr); 1441 } else { 1442 C = *B; 1443 } 1444 1445 array = a->a; 1446 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1447 for (i=0; i<mbs; i++) { 1448 cols[0] = i*bs; 1449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1450 len = ai[i+1] - ai[i]; 1451 for (j=0; j<len; j++) { 1452 rows[0] = (*aj++)*bs; 1453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1454 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1455 array += bs2; 1456 } 1457 } 1458 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1459 1460 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 1463 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1464 *B = C; 1465 } else { 1466 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1478 int fd; 1479 PetscScalar *aa; 1480 FILE *file; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1484 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1485 col_lens[0] = MAT_FILE_CLASSID; 1486 1487 col_lens[1] = A->rmap->N; 1488 col_lens[2] = A->cmap->n; 1489 col_lens[3] = a->nz*bs2; 1490 1491 /* store lengths of each row and write (including header) to file */ 1492 count = 0; 1493 for (i=0; i<a->mbs; i++) { 1494 for (j=0; j<bs; j++) { 1495 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1496 } 1497 } 1498 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1499 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1500 1501 /* store column indices (zero start index) */ 1502 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1503 count = 0; 1504 for (i=0; i<a->mbs; i++) { 1505 for (j=0; j<bs; j++) { 1506 for (k=a->i[i]; k<a->i[i+1]; k++) { 1507 for (l=0; l<bs; l++) { 1508 jj[count++] = bs*a->j[k] + l; 1509 } 1510 } 1511 } 1512 } 1513 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1514 ierr = PetscFree(jj);CHKERRQ(ierr); 1515 1516 /* store nonzero values */ 1517 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1518 count = 0; 1519 for (i=0; i<a->mbs; i++) { 1520 for (j=0; j<bs; j++) { 1521 for (k=a->i[i]; k<a->i[i+1]; k++) { 1522 for (l=0; l<bs; l++) { 1523 aa[count++] = a->a[bs2*k + l*bs + j]; 1524 } 1525 } 1526 } 1527 } 1528 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1529 ierr = PetscFree(aa);CHKERRQ(ierr); 1530 1531 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1532 if (file) { 1533 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1545 PetscViewerFormat format; 1546 1547 PetscFunctionBegin; 1548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1550 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1552 Mat aij; 1553 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1555 ierr = MatDestroy(aij);CHKERRQ(ierr); 1556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1557 PetscFunctionReturn(0); 1558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1560 for (i=0; i<a->mbs; i++) { 1561 for (j=0; j<bs; j++) { 1562 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1563 for (k=a->i[i]; k<a->i[i+1]; k++) { 1564 for (l=0; l<bs; l++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } 1575 #else 1576 if (a->a[bs2*k + l*bs + j] != 0.0) { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1578 } 1579 #endif 1580 } 1581 } 1582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1583 } 1584 } 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1586 } else { 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1588 for (i=0; i<a->mbs; i++) { 1589 for (j=0; j<bs; j++) { 1590 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1591 for (k=a->i[i]; k<a->i[i+1]; k++) { 1592 for (l=0; l<bs; l++) { 1593 #if defined(PETSC_USE_COMPLEX) 1594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1595 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1598 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1600 } else { 1601 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } 1603 #else 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1605 #endif 1606 } 1607 } 1608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1609 } 1610 } 1611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1612 } 1613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1620 { 1621 Mat A = (Mat) Aa; 1622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1623 PetscErrorCode ierr; 1624 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1625 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1626 MatScalar *aa; 1627 PetscViewer viewer; 1628 1629 PetscFunctionBegin; 1630 1631 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1632 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1633 1634 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1635 1636 /* loop over matrix elements drawing boxes */ 1637 color = PETSC_DRAW_BLUE; 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 if (PetscRealPart(*aa++) >= 0.) continue; 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 color = PETSC_DRAW_CYAN; 1652 for (i=0,row=0; i<mbs; i++,row+=bs) { 1653 for (j=a->i[i]; j<a->i[i+1]; j++) { 1654 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1655 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1656 aa = a->a + j*bs2; 1657 for (k=0; k<bs; k++) { 1658 for (l=0; l<bs; l++) { 1659 if (PetscRealPart(*aa++) != 0.) continue; 1660 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1661 } 1662 } 1663 } 1664 } 1665 1666 color = PETSC_DRAW_RED; 1667 for (i=0,row=0; i<mbs; i++,row+=bs) { 1668 for (j=a->i[i]; j<a->i[i+1]; j++) { 1669 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1670 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1671 aa = a->a + j*bs2; 1672 for (k=0; k<bs; k++) { 1673 for (l=0; l<bs; l++) { 1674 if (PetscRealPart(*aa++) <= 0.) continue; 1675 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1676 } 1677 } 1678 } 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1686 { 1687 PetscErrorCode ierr; 1688 PetscReal xl,yl,xr,yr,w,h; 1689 PetscDraw draw; 1690 PetscTruth isnull; 1691 1692 PetscFunctionBegin; 1693 1694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1696 1697 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1698 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1699 xr += w; yr += h; xl = -w; yl = -h; 1700 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1701 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatView_SeqBAIJ" 1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1709 { 1710 PetscErrorCode ierr; 1711 PetscTruth iascii,isbinary,isdraw; 1712 1713 PetscFunctionBegin; 1714 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1717 if (iascii){ 1718 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1719 } else if (isbinary) { 1720 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1721 } else if (isdraw) { 1722 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1723 } else { 1724 Mat B; 1725 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1726 ierr = MatView(B,viewer);CHKERRQ(ierr); 1727 ierr = MatDestroy(B);CHKERRQ(ierr); 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1738 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1739 PetscInt *ai = a->i,*ailen = a->ilen; 1740 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1741 MatScalar *ap,*aa = a->a; 1742 1743 PetscFunctionBegin; 1744 for (k=0; k<m; k++) { /* loop over rows */ 1745 row = im[k]; brow = row/bs; 1746 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1747 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1748 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1749 nrow = ailen[brow]; 1750 for (l=0; l<n; l++) { /* loop over columns */ 1751 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1752 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1753 col = in[l] ; 1754 bcol = col/bs; 1755 cidx = col%bs; 1756 ridx = row%bs; 1757 high = nrow; 1758 low = 0; /* assume unsorted */ 1759 while (high-low > 5) { 1760 t = (low+high)/2; 1761 if (rp[t] > bcol) high = t; 1762 else low = t; 1763 } 1764 for (i=low; i<high; i++) { 1765 if (rp[i] > bcol) break; 1766 if (rp[i] == bcol) { 1767 *v++ = ap[bs2*i+bs*cidx+ridx]; 1768 goto finished; 1769 } 1770 } 1771 *v++ = 0.0; 1772 finished:; 1773 } 1774 } 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1780 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1781 { 1782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1783 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1784 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1785 PetscErrorCode ierr; 1786 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1787 PetscTruth roworiented=a->roworiented; 1788 const PetscScalar *value = v; 1789 MatScalar *ap,*aa = a->a,*bap; 1790 1791 PetscFunctionBegin; 1792 if (roworiented) { 1793 stepval = (n-1)*bs; 1794 } else { 1795 stepval = (m-1)*bs; 1796 } 1797 for (k=0; k<m; k++) { /* loop over added rows */ 1798 row = im[k]; 1799 if (row < 0) continue; 1800 #if defined(PETSC_USE_DEBUG) 1801 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1802 #endif 1803 rp = aj + ai[row]; 1804 ap = aa + bs2*ai[row]; 1805 rmax = imax[row]; 1806 nrow = ailen[row]; 1807 low = 0; 1808 high = nrow; 1809 for (l=0; l<n; l++) { /* loop over added columns */ 1810 if (in[l] < 0) continue; 1811 #if defined(PETSC_USE_DEBUG) 1812 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1813 #endif 1814 col = in[l]; 1815 if (roworiented) { 1816 value = v + k*(stepval+bs)*bs + l*bs; 1817 } else { 1818 value = v + l*(stepval+bs)*bs + k*bs; 1819 } 1820 if (col <= lastcol) low = 0; else high = nrow; 1821 lastcol = col; 1822 while (high-low > 7) { 1823 t = (low+high)/2; 1824 if (rp[t] > col) high = t; 1825 else low = t; 1826 } 1827 for (i=low; i<high; i++) { 1828 if (rp[i] > col) break; 1829 if (rp[i] == col) { 1830 bap = ap + bs2*i; 1831 if (roworiented) { 1832 if (is == ADD_VALUES) { 1833 for (ii=0; ii<bs; ii++,value+=stepval) { 1834 for (jj=ii; jj<bs2; jj+=bs) { 1835 bap[jj] += *value++; 1836 } 1837 } 1838 } else { 1839 for (ii=0; ii<bs; ii++,value+=stepval) { 1840 for (jj=ii; jj<bs2; jj+=bs) { 1841 bap[jj] = *value++; 1842 } 1843 } 1844 } 1845 } else { 1846 if (is == ADD_VALUES) { 1847 for (ii=0; ii<bs; ii++,value+=stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 *bap++ += *value++; 1850 } 1851 } 1852 } else { 1853 for (ii=0; ii<bs; ii++,value+=stepval) { 1854 for (jj=0; jj<bs; jj++) { 1855 *bap++ = *value++; 1856 } 1857 } 1858 } 1859 } 1860 goto noinsert2; 1861 } 1862 } 1863 if (nonew == 1) goto noinsert2; 1864 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1865 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1866 N = nrow++ - 1; high++; 1867 /* shift up all the later entries in this row */ 1868 for (ii=N; ii>=i; ii--) { 1869 rp[ii+1] = rp[ii]; 1870 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1871 } 1872 if (N >= i) { 1873 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1874 } 1875 rp[i] = col; 1876 bap = ap + bs2*i; 1877 if (roworiented) { 1878 for (ii=0; ii<bs; ii++,value+=stepval) { 1879 for (jj=ii; jj<bs2; jj+=bs) { 1880 bap[jj] = *value++; 1881 } 1882 } 1883 } else { 1884 for (ii=0; ii<bs; ii++,value+=stepval) { 1885 for (jj=0; jj<bs; jj++) { 1886 *bap++ = *value++; 1887 } 1888 } 1889 } 1890 noinsert2:; 1891 low = i; 1892 } 1893 ailen[row] = nrow; 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1900 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1901 { 1902 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1903 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1904 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1905 PetscErrorCode ierr; 1906 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1907 MatScalar *aa = a->a,*ap; 1908 PetscReal ratio=0.6; 1909 1910 PetscFunctionBegin; 1911 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1912 1913 if (m) rmax = ailen[0]; 1914 for (i=1; i<mbs; i++) { 1915 /* move each row back by the amount of empty slots (fshift) before it*/ 1916 fshift += imax[i-1] - ailen[i-1]; 1917 rmax = PetscMax(rmax,ailen[i]); 1918 if (fshift) { 1919 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1920 N = ailen[i]; 1921 for (j=0; j<N; j++) { 1922 ip[j-fshift] = ip[j]; 1923 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1924 } 1925 } 1926 ai[i] = ai[i-1] + ailen[i-1]; 1927 } 1928 if (mbs) { 1929 fshift += imax[mbs-1] - ailen[mbs-1]; 1930 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1931 } 1932 /* reset ilen and imax for each row */ 1933 for (i=0; i<mbs; i++) { 1934 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1935 } 1936 a->nz = ai[mbs]; 1937 1938 /* diagonals may have moved, so kill the diagonal pointers */ 1939 a->idiagvalid = PETSC_FALSE; 1940 if (fshift && a->diag) { 1941 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1942 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1943 a->diag = 0; 1944 } 1945 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1946 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1947 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1949 A->info.mallocs += a->reallocs; 1950 a->reallocs = 0; 1951 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1952 1953 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1954 if (a->compressedrow.use){ 1955 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1956 } 1957 1958 A->same_nonzero = PETSC_TRUE; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /* 1963 This function returns an array of flags which indicate the locations of contiguous 1964 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1965 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1966 Assume: sizes should be long enough to hold all the values. 1967 */ 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1970 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1971 { 1972 PetscInt i,j,k,row; 1973 PetscTruth flg; 1974 1975 PetscFunctionBegin; 1976 for (i=0,j=0; i<n; j++) { 1977 row = idx[i]; 1978 if (row%bs!=0) { /* Not the begining of a block */ 1979 sizes[j] = 1; 1980 i++; 1981 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1982 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1983 i++; 1984 } else { /* Begining of the block, so check if the complete block exists */ 1985 flg = PETSC_TRUE; 1986 for (k=1; k<bs; k++) { 1987 if (row+k != idx[i+k]) { /* break in the block */ 1988 flg = PETSC_FALSE; 1989 break; 1990 } 1991 } 1992 if (flg) { /* No break in the bs */ 1993 sizes[j] = bs; 1994 i+= bs; 1995 } else { 1996 sizes[j] = 1; 1997 i++; 1998 } 1999 } 2000 } 2001 *bs_max = j; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2008 { 2009 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 PetscInt i,j,k,count,*rows; 2012 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2013 PetscScalar zero = 0.0; 2014 MatScalar *aa; 2015 2016 PetscFunctionBegin; 2017 /* Make a copy of the IS and sort it */ 2018 /* allocate memory for rows,sizes */ 2019 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2020 2021 /* copy IS values to rows, and sort them */ 2022 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2023 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2024 if (baij->keepnonzeropattern) { 2025 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2026 bs_max = is_n; 2027 A->same_nonzero = PETSC_TRUE; 2028 } else { 2029 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2030 A->same_nonzero = PETSC_FALSE; 2031 } 2032 2033 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2034 row = rows[j]; 2035 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2036 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2037 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2038 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2039 if (diag != 0.0) { 2040 if (baij->ilen[row/bs] > 0) { 2041 baij->ilen[row/bs] = 1; 2042 baij->j[baij->i[row/bs]] = row/bs; 2043 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2044 } 2045 /* Now insert all the diagonal values for this bs */ 2046 for (k=0; k<bs; k++) { 2047 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2048 } 2049 } else { /* (diag == 0.0) */ 2050 baij->ilen[row/bs] = 0; 2051 } /* end (diag == 0.0) */ 2052 } else { /* (sizes[i] != bs) */ 2053 #if defined (PETSC_USE_DEBUG) 2054 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2055 #endif 2056 for (k=0; k<count; k++) { 2057 aa[0] = zero; 2058 aa += bs; 2059 } 2060 if (diag != 0.0) { 2061 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2062 } 2063 } 2064 } 2065 2066 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2073 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2074 { 2075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2076 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2077 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2078 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2079 PetscErrorCode ierr; 2080 PetscInt ridx,cidx,bs2=a->bs2; 2081 PetscTruth roworiented=a->roworiented; 2082 MatScalar *ap,value,*aa=a->a,*bap; 2083 2084 PetscFunctionBegin; 2085 if (v) PetscValidScalarPointer(v,6); 2086 for (k=0; k<m; k++) { /* loop over added rows */ 2087 row = im[k]; 2088 brow = row/bs; 2089 if (row < 0) continue; 2090 #if defined(PETSC_USE_DEBUG) 2091 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2092 #endif 2093 rp = aj + ai[brow]; 2094 ap = aa + bs2*ai[brow]; 2095 rmax = imax[brow]; 2096 nrow = ailen[brow]; 2097 low = 0; 2098 high = nrow; 2099 for (l=0; l<n; l++) { /* loop over added columns */ 2100 if (in[l] < 0) continue; 2101 #if defined(PETSC_USE_DEBUG) 2102 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2103 #endif 2104 col = in[l]; bcol = col/bs; 2105 ridx = row % bs; cidx = col % bs; 2106 if (roworiented) { 2107 value = v[l + k*n]; 2108 } else { 2109 value = v[k + l*m]; 2110 } 2111 if (col <= lastcol) low = 0; else high = nrow; 2112 lastcol = col; 2113 while (high-low > 7) { 2114 t = (low+high)/2; 2115 if (rp[t] > bcol) high = t; 2116 else low = t; 2117 } 2118 for (i=low; i<high; i++) { 2119 if (rp[i] > bcol) break; 2120 if (rp[i] == bcol) { 2121 bap = ap + bs2*i + bs*cidx + ridx; 2122 if (is == ADD_VALUES) *bap += value; 2123 else *bap = value; 2124 goto noinsert1; 2125 } 2126 } 2127 if (nonew == 1) goto noinsert1; 2128 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2129 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2130 N = nrow++ - 1; high++; 2131 /* shift up all the later entries in this row */ 2132 for (ii=N; ii>=i; ii--) { 2133 rp[ii+1] = rp[ii]; 2134 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2135 } 2136 if (N>=i) { 2137 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2138 } 2139 rp[i] = bcol; 2140 ap[bs2*i + bs*cidx + ridx] = value; 2141 a->nz++; 2142 noinsert1:; 2143 low = i; 2144 } 2145 ailen[brow] = nrow; 2146 } 2147 A->same_nonzero = PETSC_FALSE; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2153 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2154 { 2155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2156 Mat outA; 2157 PetscErrorCode ierr; 2158 PetscTruth row_identity,col_identity; 2159 2160 PetscFunctionBegin; 2161 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2162 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2163 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2164 if (!row_identity || !col_identity) { 2165 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2166 } 2167 2168 outA = inA; 2169 inA->factortype = MAT_FACTOR_LU; 2170 2171 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2172 2173 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2174 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2175 a->row = row; 2176 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2177 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2178 a->col = col; 2179 2180 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2181 if (a->icol) { 2182 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2183 } 2184 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2185 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2186 2187 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2188 if (!a->solve_work) { 2189 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2190 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2191 } 2192 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2193 2194 PetscFunctionReturn(0); 2195 } 2196 2197 EXTERN_C_BEGIN 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2200 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2201 { 2202 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2203 PetscInt i,nz,mbs; 2204 2205 PetscFunctionBegin; 2206 nz = baij->maxnz/baij->bs2; 2207 mbs = baij->mbs; 2208 for (i=0; i<nz; i++) { 2209 baij->j[i] = indices[i]; 2210 } 2211 baij->nz = nz; 2212 for (i=0; i<mbs; i++) { 2213 baij->ilen[i] = baij->imax[i]; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 EXTERN_C_END 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2221 /*@ 2222 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2223 in the matrix. 2224 2225 Input Parameters: 2226 + mat - the SeqBAIJ matrix 2227 - indices - the column indices 2228 2229 Level: advanced 2230 2231 Notes: 2232 This can be called if you have precomputed the nonzero structure of the 2233 matrix and want to provide it to the matrix object to improve the performance 2234 of the MatSetValues() operation. 2235 2236 You MUST have set the correct numbers of nonzeros per row in the call to 2237 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2238 2239 MUST be called before any calls to MatSetValues(); 2240 2241 @*/ 2242 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2243 { 2244 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2245 2246 PetscFunctionBegin; 2247 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2248 PetscValidPointer(indices,2); 2249 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2250 if (f) { 2251 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2252 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2258 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2259 { 2260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2263 PetscReal atmp; 2264 PetscScalar *x,zero = 0.0; 2265 MatScalar *aa; 2266 PetscInt ncols,brow,krow,kcol; 2267 2268 PetscFunctionBegin; 2269 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2270 bs = A->rmap->bs; 2271 aa = a->a; 2272 ai = a->i; 2273 aj = a->j; 2274 mbs = a->mbs; 2275 2276 ierr = VecSet(v,zero);CHKERRQ(ierr); 2277 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2278 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2279 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2280 for (i=0; i<mbs; i++) { 2281 ncols = ai[1] - ai[0]; ai++; 2282 brow = bs*i; 2283 for (j=0; j<ncols; j++){ 2284 for (kcol=0; kcol<bs; kcol++){ 2285 for (krow=0; krow<bs; krow++){ 2286 atmp = PetscAbsScalar(*aa);aa++; 2287 row = brow + krow; /* row index */ 2288 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2289 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2290 } 2291 } 2292 aj++; 2293 } 2294 } 2295 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatCopy_SeqBAIJ" 2301 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2302 { 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 /* If the two matrices have the same copy implementation, use fast copy. */ 2307 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2309 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2310 2311 if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2312 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2313 } else { 2314 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2315 } 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2321 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2322 { 2323 PetscErrorCode ierr; 2324 2325 PetscFunctionBegin; 2326 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2327 PetscFunctionReturn(0); 2328 } 2329 2330 #undef __FUNCT__ 2331 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2332 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2333 { 2334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2335 PetscFunctionBegin; 2336 *array = a->a; 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2342 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2343 { 2344 PetscFunctionBegin; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2350 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2351 { 2352 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2353 PetscErrorCode ierr; 2354 PetscInt i,bs=Y->rmap->bs,j,bs2; 2355 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2356 2357 PetscFunctionBegin; 2358 if (str == SAME_NONZERO_PATTERN) { 2359 PetscScalar alpha = a; 2360 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2361 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2362 if (y->xtoy && y->XtoY != X) { 2363 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2364 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2365 } 2366 if (!y->xtoy) { /* get xtoy */ 2367 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2368 y->XtoY = X; 2369 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2370 } 2371 bs2 = bs*bs; 2372 for (i=0; i<x->nz; i++) { 2373 j = 0; 2374 while (j < bs2){ 2375 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2376 j++; 2377 } 2378 } 2379 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2380 } else { 2381 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2388 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2389 { 2390 PetscInt rbs,cbs; 2391 PetscErrorCode ierr; 2392 2393 PetscFunctionBegin; 2394 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2395 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2396 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2397 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2403 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2404 { 2405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2406 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2407 MatScalar *aa = a->a; 2408 2409 PetscFunctionBegin; 2410 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2416 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2417 { 2418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2419 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2420 MatScalar *aa = a->a; 2421 2422 PetscFunctionBegin; 2423 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2431 /* 2432 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2433 */ 2434 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2435 { 2436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2437 PetscErrorCode ierr; 2438 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2439 PetscInt nz = a->i[m],row,*jj,mr,col; 2440 2441 PetscFunctionBegin; 2442 *nn = n; 2443 if (!ia) PetscFunctionReturn(0); 2444 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2445 else { 2446 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2447 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2448 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2449 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2450 jj = a->j; 2451 for (i=0; i<nz; i++) { 2452 collengths[jj[i]]++; 2453 } 2454 cia[0] = oshift; 2455 for (i=0; i<n; i++) { 2456 cia[i+1] = cia[i] + collengths[i]; 2457 } 2458 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2459 jj = a->j; 2460 for (row=0; row<m; row++) { 2461 mr = a->i[row+1] - a->i[row]; 2462 for (i=0; i<mr; i++) { 2463 col = *jj++; 2464 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2465 } 2466 } 2467 ierr = PetscFree(collengths);CHKERRQ(ierr); 2468 *ia = cia; *ja = cja; 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2475 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2476 { 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 if (!ia) PetscFunctionReturn(0); 2481 ierr = PetscFree(*ia);CHKERRQ(ierr); 2482 ierr = PetscFree(*ja);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2488 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2489 { 2490 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2491 PetscErrorCode ierr; 2492 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2493 PetscScalar dx,*y,*xx,*w3_array; 2494 PetscScalar *vscale_array; 2495 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2496 Vec w1=coloring->w1,w2=coloring->w2,w3; 2497 void *fctx = coloring->fctx; 2498 PetscTruth flg = PETSC_FALSE; 2499 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2500 Vec x1_tmp; 2501 2502 PetscFunctionBegin; 2503 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2504 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2505 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2506 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2507 2508 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2509 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2510 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2511 if (flg) { 2512 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2513 } else { 2514 PetscTruth assembled; 2515 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2516 if (assembled) { 2517 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2518 } 2519 } 2520 2521 x1_tmp = x1; 2522 if (!coloring->vscale){ 2523 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2524 } 2525 2526 /* 2527 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2528 coloring->F for the coarser grids from the finest 2529 */ 2530 if (coloring->F) { 2531 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2532 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2533 if (m1 != m2) { 2534 coloring->F = 0; 2535 } 2536 } 2537 2538 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2539 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2540 } 2541 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2542 2543 /* Set w1 = F(x1) */ 2544 if (coloring->F) { 2545 w1 = coloring->F; /* use already computed value of function */ 2546 coloring->F = 0; 2547 } else { 2548 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2549 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2550 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2551 } 2552 2553 if (!coloring->w3) { 2554 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2555 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2556 } 2557 w3 = coloring->w3; 2558 2559 CHKMEMQ; 2560 /* Compute all the local scale factors, including ghost points */ 2561 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2562 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2563 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2564 if (ctype == IS_COLORING_GHOSTED){ 2565 col_start = 0; col_end = N; 2566 } else if (ctype == IS_COLORING_GLOBAL){ 2567 xx = xx - start; 2568 vscale_array = vscale_array - start; 2569 col_start = start; col_end = N + start; 2570 } CHKMEMQ; 2571 for (col=col_start; col<col_end; col++){ 2572 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2573 if (coloring->htype[0] == 'w') { 2574 dx = 1.0 + unorm; 2575 } else { 2576 dx = xx[col]; 2577 } 2578 if (dx == 0.0) dx = 1.0; 2579 #if !defined(PETSC_USE_COMPLEX) 2580 if (dx < umin && dx >= 0.0) dx = umin; 2581 else if (dx < 0.0 && dx > -umin) dx = -umin; 2582 #else 2583 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2584 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2585 #endif 2586 dx *= epsilon; 2587 vscale_array[col] = 1.0/dx; 2588 } CHKMEMQ; 2589 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2590 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2591 if (ctype == IS_COLORING_GLOBAL){ 2592 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2593 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2594 } 2595 CHKMEMQ; 2596 if (coloring->vscaleforrow) { 2597 vscaleforrow = coloring->vscaleforrow; 2598 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2599 2600 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2601 /* 2602 Loop over each color 2603 */ 2604 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2605 for (k=0; k<coloring->ncolors; k++) { 2606 coloring->currentcolor = k; 2607 for (i=0; i<bs; i++) { 2608 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2609 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2610 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2611 /* 2612 Loop over each column associated with color 2613 adding the perturbation to the vector w3. 2614 */ 2615 for (l=0; l<coloring->ncolumns[k]; l++) { 2616 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2617 if (coloring->htype[0] == 'w') { 2618 dx = 1.0 + unorm; 2619 } else { 2620 dx = xx[col]; 2621 } 2622 if (dx == 0.0) dx = 1.0; 2623 #if !defined(PETSC_USE_COMPLEX) 2624 if (dx < umin && dx >= 0.0) dx = umin; 2625 else if (dx < 0.0 && dx > -umin) dx = -umin; 2626 #else 2627 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2628 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2629 #endif 2630 dx *= epsilon; 2631 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2632 w3_array[col] += dx; 2633 } 2634 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2635 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2636 2637 /* 2638 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2639 w2 = F(x1 + dx) - F(x1) 2640 */ 2641 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2642 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2643 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2644 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2645 2646 /* 2647 Loop over rows of vector, putting results into Jacobian matrix 2648 */ 2649 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2650 for (l=0; l<coloring->nrows[k]; l++) { 2651 row = bs*coloring->rows[k][l]; /* local row index */ 2652 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2653 for (j=0; j<bs; j++) { 2654 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2655 srows[j] = row + start + j; 2656 } 2657 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2658 } 2659 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2660 } 2661 } /* endof for each color */ 2662 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2663 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2664 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2665 ierr = PetscFree(srows);CHKERRQ(ierr); 2666 2667 coloring->currentcolor = -1; 2668 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2669 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2670 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 /* -------------------------------------------------------------------*/ 2675 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2676 MatGetRow_SeqBAIJ, 2677 MatRestoreRow_SeqBAIJ, 2678 MatMult_SeqBAIJ_N, 2679 /* 4*/ MatMultAdd_SeqBAIJ_N, 2680 MatMultTranspose_SeqBAIJ, 2681 MatMultTransposeAdd_SeqBAIJ, 2682 0, 2683 0, 2684 0, 2685 /*10*/ 0, 2686 MatLUFactor_SeqBAIJ, 2687 0, 2688 0, 2689 MatTranspose_SeqBAIJ, 2690 /*15*/ MatGetInfo_SeqBAIJ, 2691 MatEqual_SeqBAIJ, 2692 MatGetDiagonal_SeqBAIJ, 2693 MatDiagonalScale_SeqBAIJ, 2694 MatNorm_SeqBAIJ, 2695 /*20*/ 0, 2696 MatAssemblyEnd_SeqBAIJ, 2697 MatSetOption_SeqBAIJ, 2698 MatZeroEntries_SeqBAIJ, 2699 /*24*/ MatZeroRows_SeqBAIJ, 2700 0, 2701 0, 2702 0, 2703 0, 2704 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2705 0, 2706 0, 2707 MatGetArray_SeqBAIJ, 2708 MatRestoreArray_SeqBAIJ, 2709 /*34*/ MatDuplicate_SeqBAIJ, 2710 0, 2711 0, 2712 MatILUFactor_SeqBAIJ, 2713 0, 2714 /*39*/ MatAXPY_SeqBAIJ, 2715 MatGetSubMatrices_SeqBAIJ, 2716 MatIncreaseOverlap_SeqBAIJ, 2717 MatGetValues_SeqBAIJ, 2718 MatCopy_SeqBAIJ, 2719 /*44*/ 0, 2720 MatScale_SeqBAIJ, 2721 0, 2722 0, 2723 0, 2724 /*49*/ MatSetBlockSize_SeqBAIJ, 2725 MatGetRowIJ_SeqBAIJ, 2726 MatRestoreRowIJ_SeqBAIJ, 2727 MatGetColumnIJ_SeqBAIJ, 2728 MatRestoreColumnIJ_SeqBAIJ, 2729 /*54*/ MatFDColoringCreate_SeqAIJ, 2730 0, 2731 0, 2732 0, 2733 MatSetValuesBlocked_SeqBAIJ, 2734 /*59*/ MatGetSubMatrix_SeqBAIJ, 2735 MatDestroy_SeqBAIJ, 2736 MatView_SeqBAIJ, 2737 0, 2738 0, 2739 /*64*/ 0, 2740 0, 2741 0, 2742 0, 2743 0, 2744 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2745 0, 2746 MatConvert_Basic, 2747 0, 2748 0, 2749 /*74*/ 0, 2750 MatFDColoringApply_BAIJ, 2751 0, 2752 0, 2753 0, 2754 /*79*/ 0, 2755 0, 2756 0, 2757 0, 2758 0, 2759 /*84*/ 0, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*89*/ 0, 2765 0, 2766 0, 2767 0, 2768 0, 2769 /*94*/ 0, 2770 0, 2771 0, 2772 0, 2773 0, 2774 /*99*/0, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*104*/0, 2780 MatRealPart_SeqBAIJ, 2781 MatImaginaryPart_SeqBAIJ, 2782 0, 2783 0, 2784 /*109*/0, 2785 0, 2786 0, 2787 0, 2788 MatMissingDiagonal_SeqBAIJ, 2789 /*114*/0, 2790 0, 2791 0, 2792 0, 2793 0, 2794 /*119*/0, 2795 0, 2796 MatMultHermitianTranspose_SeqBAIJ, 2797 MatMultHermitianTransposeAdd_SeqBAIJ, 2798 MatLoadnew_SeqBAIJ 2799 }; 2800 2801 EXTERN_C_BEGIN 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2804 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2805 { 2806 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2807 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2812 2813 /* allocate space for values if not already there */ 2814 if (!aij->saved_values) { 2815 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2816 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2817 } 2818 2819 /* copy values over */ 2820 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 EXTERN_C_END 2824 2825 EXTERN_C_BEGIN 2826 #undef __FUNCT__ 2827 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2828 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2829 { 2830 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2831 PetscErrorCode ierr; 2832 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2833 2834 PetscFunctionBegin; 2835 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2836 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2837 2838 /* copy values over */ 2839 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842 EXTERN_C_END 2843 2844 EXTERN_C_BEGIN 2845 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2846 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2847 EXTERN_C_END 2848 2849 EXTERN_C_BEGIN 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2852 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2853 { 2854 Mat_SeqBAIJ *b; 2855 PetscErrorCode ierr; 2856 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2857 PetscTruth flg,skipallocation = PETSC_FALSE; 2858 2859 PetscFunctionBegin; 2860 2861 if (nz == MAT_SKIP_ALLOCATION) { 2862 skipallocation = PETSC_TRUE; 2863 nz = 0; 2864 } 2865 2866 if (bs < 0) { 2867 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2868 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2869 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2870 bs = PetscAbs(bs); 2871 } 2872 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2873 bs = newbs; 2874 2875 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2876 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2877 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2878 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2879 2880 B->preallocated = PETSC_TRUE; 2881 2882 mbs = B->rmap->n/bs; 2883 nbs = B->cmap->n/bs; 2884 bs2 = bs*bs; 2885 2886 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2887 2888 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2889 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2890 if (nnz) { 2891 for (i=0; i<mbs; i++) { 2892 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2893 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2894 } 2895 } 2896 2897 b = (Mat_SeqBAIJ*)B->data; 2898 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2899 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2900 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2901 2902 if (!flg) { 2903 switch (bs) { 2904 case 1: 2905 B->ops->mult = MatMult_SeqBAIJ_1; 2906 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2907 B->ops->sor = MatSOR_SeqBAIJ_1; 2908 break; 2909 case 2: 2910 B->ops->mult = MatMult_SeqBAIJ_2; 2911 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2912 B->ops->sor = MatSOR_SeqBAIJ_2; 2913 break; 2914 case 3: 2915 B->ops->mult = MatMult_SeqBAIJ_3; 2916 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2917 B->ops->sor = MatSOR_SeqBAIJ_3; 2918 break; 2919 case 4: 2920 B->ops->mult = MatMult_SeqBAIJ_4; 2921 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2922 B->ops->sor = MatSOR_SeqBAIJ_4; 2923 break; 2924 case 5: 2925 B->ops->mult = MatMult_SeqBAIJ_5; 2926 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2927 B->ops->sor = MatSOR_SeqBAIJ_5; 2928 break; 2929 case 6: 2930 B->ops->mult = MatMult_SeqBAIJ_6; 2931 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2932 B->ops->sor = MatSOR_SeqBAIJ_6; 2933 break; 2934 case 7: 2935 B->ops->mult = MatMult_SeqBAIJ_7; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2937 B->ops->sor = MatSOR_SeqBAIJ_7; 2938 break; 2939 case 15: 2940 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2941 break; 2942 default: 2943 B->ops->mult = MatMult_SeqBAIJ_N; 2944 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2945 break; 2946 } 2947 } 2948 B->rmap->bs = bs; 2949 b->mbs = mbs; 2950 b->nbs = nbs; 2951 if (!skipallocation) { 2952 if (!b->imax) { 2953 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2954 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2955 b->free_imax_ilen = PETSC_TRUE; 2956 } 2957 /* b->ilen will count nonzeros in each block row so far. */ 2958 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2959 if (!nnz) { 2960 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2961 else if (nz <= 0) nz = 1; 2962 for (i=0; i<mbs; i++) b->imax[i] = nz; 2963 nz = nz*mbs; 2964 } else { 2965 nz = 0; 2966 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2967 } 2968 2969 /* allocate the matrix space */ 2970 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2971 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2972 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2973 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2974 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2975 b->singlemalloc = PETSC_TRUE; 2976 b->i[0] = 0; 2977 for (i=1; i<mbs+1; i++) { 2978 b->i[i] = b->i[i-1] + b->imax[i-1]; 2979 } 2980 b->free_a = PETSC_TRUE; 2981 b->free_ij = PETSC_TRUE; 2982 } else { 2983 b->free_a = PETSC_FALSE; 2984 b->free_ij = PETSC_FALSE; 2985 } 2986 2987 B->rmap->bs = bs; 2988 b->bs2 = bs2; 2989 b->mbs = mbs; 2990 b->nz = 0; 2991 b->maxnz = nz*bs2; 2992 B->info.nz_unneeded = (PetscReal)b->maxnz; 2993 PetscFunctionReturn(0); 2994 } 2995 EXTERN_C_END 2996 2997 EXTERN_C_BEGIN 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3000 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3001 { 3002 PetscInt i,m,nz,nz_max=0,*nnz; 3003 PetscScalar *values=0; 3004 PetscErrorCode ierr; 3005 3006 PetscFunctionBegin; 3007 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3008 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3009 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3010 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3011 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3012 m = B->rmap->n/bs; 3013 3014 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3015 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3016 for(i=0; i<m; i++) { 3017 nz = ii[i+1]- ii[i]; 3018 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3019 nz_max = PetscMax(nz_max, nz); 3020 nnz[i] = nz; 3021 } 3022 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3023 ierr = PetscFree(nnz);CHKERRQ(ierr); 3024 3025 values = (PetscScalar*)V; 3026 if (!values) { 3027 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3028 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3029 } 3030 for (i=0; i<m; i++) { 3031 PetscInt ncols = ii[i+1] - ii[i]; 3032 const PetscInt *icols = jj + ii[i]; 3033 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3034 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3035 } 3036 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3037 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 3040 PetscFunctionReturn(0); 3041 } 3042 EXTERN_C_END 3043 3044 3045 EXTERN_C_BEGIN 3046 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3047 #if defined(PETSC_HAVE_MUMPS) 3048 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3049 #endif 3050 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3051 EXTERN_C_END 3052 3053 /*MC 3054 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3055 block sparse compressed row format. 3056 3057 Options Database Keys: 3058 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3059 3060 Level: beginner 3061 3062 .seealso: MatCreateSeqBAIJ() 3063 M*/ 3064 3065 3066 EXTERN_C_BEGIN 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatCreate_SeqBAIJ" 3069 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3070 { 3071 PetscErrorCode ierr; 3072 PetscMPIInt size; 3073 Mat_SeqBAIJ *b; 3074 3075 PetscFunctionBegin; 3076 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3077 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3078 3079 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3080 B->data = (void*)b; 3081 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3082 B->mapping = 0; 3083 b->row = 0; 3084 b->col = 0; 3085 b->icol = 0; 3086 b->reallocs = 0; 3087 b->saved_values = 0; 3088 3089 b->roworiented = PETSC_TRUE; 3090 b->nonew = 0; 3091 b->diag = 0; 3092 b->solve_work = 0; 3093 b->mult_work = 0; 3094 B->spptr = 0; 3095 B->info.nz_unneeded = (PetscReal)b->maxnz; 3096 b->keepnonzeropattern = PETSC_FALSE; 3097 b->xtoy = 0; 3098 b->XtoY = 0; 3099 b->compressedrow.use = PETSC_FALSE; 3100 b->compressedrow.nrows = 0; 3101 b->compressedrow.i = PETSC_NULL; 3102 b->compressedrow.rindex = PETSC_NULL; 3103 b->compressedrow.checked = PETSC_FALSE; 3104 B->same_nonzero = PETSC_FALSE; 3105 3106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3107 "MatGetFactorAvailable_seqbaij_petsc", 3108 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3110 "MatGetFactor_seqbaij_petsc", 3111 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3112 #if defined(PETSC_HAVE_MUMPS) 3113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3114 #endif 3115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3116 "MatInvertBlockDiagonal_SeqBAIJ", 3117 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3119 "MatStoreValues_SeqBAIJ", 3120 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3122 "MatRetrieveValues_SeqBAIJ", 3123 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3125 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3126 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3128 "MatConvert_SeqBAIJ_SeqAIJ", 3129 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3131 "MatConvert_SeqBAIJ_SeqSBAIJ", 3132 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3134 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3135 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3137 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3138 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3139 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 EXTERN_C_END 3143 3144 #undef __FUNCT__ 3145 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3146 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3147 { 3148 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3149 PetscErrorCode ierr; 3150 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3151 3152 PetscFunctionBegin; 3153 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3154 3155 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3156 c->imax = a->imax; 3157 c->ilen = a->ilen; 3158 c->free_imax_ilen = PETSC_FALSE; 3159 } else { 3160 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3161 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3162 for (i=0; i<mbs; i++) { 3163 c->imax[i] = a->imax[i]; 3164 c->ilen[i] = a->ilen[i]; 3165 } 3166 c->free_imax_ilen = PETSC_TRUE; 3167 } 3168 3169 /* allocate the matrix space */ 3170 if (mallocmatspace){ 3171 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3172 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3173 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3174 c->singlemalloc = PETSC_FALSE; 3175 c->free_ij = PETSC_FALSE; 3176 c->i = a->i; 3177 c->j = a->j; 3178 c->parent = A; 3179 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3180 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3181 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3182 } else { 3183 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3184 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3185 c->singlemalloc = PETSC_TRUE; 3186 c->free_ij = PETSC_TRUE; 3187 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3188 if (mbs > 0) { 3189 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3190 if (cpvalues == MAT_COPY_VALUES) { 3191 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3192 } else { 3193 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3194 } 3195 } 3196 } 3197 } 3198 3199 c->roworiented = a->roworiented; 3200 c->nonew = a->nonew; 3201 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3202 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3203 c->bs2 = a->bs2; 3204 c->mbs = a->mbs; 3205 c->nbs = a->nbs; 3206 3207 if (a->diag) { 3208 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3209 c->diag = a->diag; 3210 c->free_diag = PETSC_FALSE; 3211 } else { 3212 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3213 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3214 for (i=0; i<mbs; i++) { 3215 c->diag[i] = a->diag[i]; 3216 } 3217 c->free_diag = PETSC_TRUE; 3218 } 3219 } else c->diag = 0; 3220 c->nz = a->nz; 3221 c->maxnz = bs2*a->nz; /* Since we allocate exactly the right amount */ 3222 c->solve_work = 0; 3223 c->mult_work = 0; 3224 c->free_a = PETSC_TRUE; 3225 c->free_ij = PETSC_TRUE; 3226 C->preallocated = PETSC_TRUE; 3227 C->assembled = PETSC_TRUE; 3228 3229 c->compressedrow.use = a->compressedrow.use; 3230 c->compressedrow.nrows = a->compressedrow.nrows; 3231 c->compressedrow.checked = a->compressedrow.checked; 3232 if (a->compressedrow.checked && a->compressedrow.use){ 3233 i = a->compressedrow.nrows; 3234 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3235 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3236 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3237 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3238 } else { 3239 c->compressedrow.use = PETSC_FALSE; 3240 c->compressedrow.i = PETSC_NULL; 3241 c->compressedrow.rindex = PETSC_NULL; 3242 } 3243 C->same_nonzero = A->same_nonzero; 3244 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3245 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3251 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3252 { 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3257 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3258 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3259 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3260 PetscFunctionReturn(0); 3261 } 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatLoad_SeqBAIJ" 3265 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3266 { 3267 Mat_SeqBAIJ *a; 3268 Mat B; 3269 PetscErrorCode ierr; 3270 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3271 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3272 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3273 PetscInt *masked,nmask,tmp,bs2,ishift; 3274 PetscMPIInt size; 3275 int fd; 3276 PetscScalar *aa; 3277 MPI_Comm comm = ((PetscObject)viewer)->comm; 3278 3279 PetscFunctionBegin; 3280 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3281 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3282 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3283 bs2 = bs*bs; 3284 3285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3286 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3287 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3289 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3290 M = header[1]; N = header[2]; nz = header[3]; 3291 3292 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3293 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3294 3295 /* 3296 This code adds extra rows to make sure the number of rows is 3297 divisible by the blocksize 3298 */ 3299 mbs = M/bs; 3300 extra_rows = bs - M + bs*(mbs); 3301 if (extra_rows == bs) extra_rows = 0; 3302 else mbs++; 3303 if (extra_rows) { 3304 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3305 } 3306 3307 /* read in row lengths */ 3308 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3309 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3310 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3311 3312 /* read in column indices */ 3313 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3314 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3315 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3316 3317 /* loop over row lengths determining block row lengths */ 3318 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3319 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3320 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3321 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3322 rowcount = 0; 3323 nzcount = 0; 3324 for (i=0; i<mbs; i++) { 3325 nmask = 0; 3326 for (j=0; j<bs; j++) { 3327 kmax = rowlengths[rowcount]; 3328 for (k=0; k<kmax; k++) { 3329 tmp = jj[nzcount++]/bs; 3330 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3331 } 3332 rowcount++; 3333 } 3334 browlengths[i] += nmask; 3335 /* zero out the mask elements we set */ 3336 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3337 } 3338 3339 /* create our matrix */ 3340 ierr = MatCreate(comm,&B); 3341 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3342 ierr = MatSetType(B,type);CHKERRQ(ierr); 3343 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3344 a = (Mat_SeqBAIJ*)B->data; 3345 3346 /* set matrix "i" values */ 3347 a->i[0] = 0; 3348 for (i=1; i<= mbs; i++) { 3349 a->i[i] = a->i[i-1] + browlengths[i-1]; 3350 a->ilen[i-1] = browlengths[i-1]; 3351 } 3352 a->nz = 0; 3353 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3354 3355 /* read in nonzero values */ 3356 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3357 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3358 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3359 3360 /* set "a" and "j" values into matrix */ 3361 nzcount = 0; jcount = 0; 3362 for (i=0; i<mbs; i++) { 3363 nzcountb = nzcount; 3364 nmask = 0; 3365 for (j=0; j<bs; j++) { 3366 kmax = rowlengths[i*bs+j]; 3367 for (k=0; k<kmax; k++) { 3368 tmp = jj[nzcount++]/bs; 3369 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3370 } 3371 } 3372 /* sort the masked values */ 3373 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3374 3375 /* set "j" values into matrix */ 3376 maskcount = 1; 3377 for (j=0; j<nmask; j++) { 3378 a->j[jcount++] = masked[j]; 3379 mask[masked[j]] = maskcount++; 3380 } 3381 /* set "a" values into matrix */ 3382 ishift = bs2*a->i[i]; 3383 for (j=0; j<bs; j++) { 3384 kmax = rowlengths[i*bs+j]; 3385 for (k=0; k<kmax; k++) { 3386 tmp = jj[nzcountb]/bs ; 3387 block = mask[tmp] - 1; 3388 point = jj[nzcountb] - bs*tmp; 3389 idx = ishift + bs2*block + j + bs*point; 3390 a->a[idx] = (MatScalar)aa[nzcountb++]; 3391 } 3392 } 3393 /* zero out the mask elements we set */ 3394 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3395 } 3396 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3397 3398 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3399 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3400 ierr = PetscFree(aa);CHKERRQ(ierr); 3401 ierr = PetscFree(jj);CHKERRQ(ierr); 3402 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3403 3404 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3405 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3406 ierr = MatView_Private(B);CHKERRQ(ierr); 3407 3408 *A = B; 3409 PetscFunctionReturn(0); 3410 } 3411 3412 #undef __FUNCT__ 3413 #define __FUNCT__ "MatLoadnew_SeqBAIJ" 3414 PetscErrorCode MatLoadnew_SeqBAIJ(PetscViewer viewer,Mat newmat) 3415 { 3416 Mat_SeqBAIJ *a; 3417 PetscErrorCode ierr; 3418 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3419 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3420 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3421 PetscInt *masked,nmask,tmp,bs2,ishift; 3422 PetscMPIInt size; 3423 int fd; 3424 PetscScalar *aa; 3425 MPI_Comm comm = ((PetscObject)viewer)->comm; 3426 3427 PetscFunctionBegin; 3428 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3429 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3430 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3431 bs2 = bs*bs; 3432 3433 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3434 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3435 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3436 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3437 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3438 M = header[1]; N = header[2]; nz = header[3]; 3439 3440 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3441 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3442 3443 /* 3444 This code adds extra rows to make sure the number of rows is 3445 divisible by the blocksize 3446 */ 3447 mbs = M/bs; 3448 extra_rows = bs - M + bs*(mbs); 3449 if (extra_rows == bs) extra_rows = 0; 3450 else mbs++; 3451 if (extra_rows) { 3452 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3453 } 3454 3455 /* Set global sizes if not already set */ 3456 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3457 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3458 } else { /* Check if the matrix global sizes are correct */ 3459 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3460 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3461 } 3462 3463 /* read in row lengths */ 3464 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3465 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3466 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3467 3468 /* read in column indices */ 3469 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3470 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3471 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3472 3473 /* loop over row lengths determining block row lengths */ 3474 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3475 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3476 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3477 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3478 rowcount = 0; 3479 nzcount = 0; 3480 for (i=0; i<mbs; i++) { 3481 nmask = 0; 3482 for (j=0; j<bs; j++) { 3483 kmax = rowlengths[rowcount]; 3484 for (k=0; k<kmax; k++) { 3485 tmp = jj[nzcount++]/bs; 3486 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3487 } 3488 rowcount++; 3489 } 3490 browlengths[i] += nmask; 3491 /* zero out the mask elements we set */ 3492 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3493 } 3494 3495 /* Do preallocation */ 3496 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3497 a = (Mat_SeqBAIJ*)newmat->data; 3498 3499 /* set matrix "i" values */ 3500 a->i[0] = 0; 3501 for (i=1; i<= mbs; i++) { 3502 a->i[i] = a->i[i-1] + browlengths[i-1]; 3503 a->ilen[i-1] = browlengths[i-1]; 3504 } 3505 a->nz = 0; 3506 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3507 3508 /* read in nonzero values */ 3509 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3510 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3511 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3512 3513 /* set "a" and "j" values into matrix */ 3514 nzcount = 0; jcount = 0; 3515 for (i=0; i<mbs; i++) { 3516 nzcountb = nzcount; 3517 nmask = 0; 3518 for (j=0; j<bs; j++) { 3519 kmax = rowlengths[i*bs+j]; 3520 for (k=0; k<kmax; k++) { 3521 tmp = jj[nzcount++]/bs; 3522 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3523 } 3524 } 3525 /* sort the masked values */ 3526 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3527 3528 /* set "j" values into matrix */ 3529 maskcount = 1; 3530 for (j=0; j<nmask; j++) { 3531 a->j[jcount++] = masked[j]; 3532 mask[masked[j]] = maskcount++; 3533 } 3534 /* set "a" values into matrix */ 3535 ishift = bs2*a->i[i]; 3536 for (j=0; j<bs; j++) { 3537 kmax = rowlengths[i*bs+j]; 3538 for (k=0; k<kmax; k++) { 3539 tmp = jj[nzcountb]/bs ; 3540 block = mask[tmp] - 1; 3541 point = jj[nzcountb] - bs*tmp; 3542 idx = ishift + bs2*block + j + bs*point; 3543 a->a[idx] = (MatScalar)aa[nzcountb++]; 3544 } 3545 } 3546 /* zero out the mask elements we set */ 3547 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3548 } 3549 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3550 3551 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3552 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3553 ierr = PetscFree(aa);CHKERRQ(ierr); 3554 ierr = PetscFree(jj);CHKERRQ(ierr); 3555 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3556 3557 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3558 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3559 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3560 PetscFunctionReturn(0); 3561 } 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatCreateSeqBAIJ" 3565 /*@C 3566 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3567 compressed row) format. For good matrix assembly performance the 3568 user should preallocate the matrix storage by setting the parameter nz 3569 (or the array nnz). By setting these parameters accurately, performance 3570 during matrix assembly can be increased by more than a factor of 50. 3571 3572 Collective on MPI_Comm 3573 3574 Input Parameters: 3575 + comm - MPI communicator, set to PETSC_COMM_SELF 3576 . bs - size of block 3577 . m - number of rows 3578 . n - number of columns 3579 . nz - number of nonzero blocks per block row (same for all rows) 3580 - nnz - array containing the number of nonzero blocks in the various block rows 3581 (possibly different for each block row) or PETSC_NULL 3582 3583 Output Parameter: 3584 . A - the matrix 3585 3586 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3587 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3588 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3589 3590 Options Database Keys: 3591 . -mat_no_unroll - uses code that does not unroll the loops in the 3592 block calculations (much slower) 3593 . -mat_block_size - size of the blocks to use 3594 3595 Level: intermediate 3596 3597 Notes: 3598 The number of rows and columns must be divisible by blocksize. 3599 3600 If the nnz parameter is given then the nz parameter is ignored 3601 3602 A nonzero block is any block that as 1 or more nonzeros in it 3603 3604 The block AIJ format is fully compatible with standard Fortran 77 3605 storage. That is, the stored row and column indices can begin at 3606 either one (as in Fortran) or zero. See the users' manual for details. 3607 3608 Specify the preallocated storage with either nz or nnz (not both). 3609 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3610 allocation. For additional details, see the users manual chapter on 3611 matrices. 3612 3613 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3614 @*/ 3615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3616 { 3617 PetscErrorCode ierr; 3618 3619 PetscFunctionBegin; 3620 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3621 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3622 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3623 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3624 PetscFunctionReturn(0); 3625 } 3626 3627 #undef __FUNCT__ 3628 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3629 /*@C 3630 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3631 per row in the matrix. For good matrix assembly performance the 3632 user should preallocate the matrix storage by setting the parameter nz 3633 (or the array nnz). By setting these parameters accurately, performance 3634 during matrix assembly can be increased by more than a factor of 50. 3635 3636 Collective on MPI_Comm 3637 3638 Input Parameters: 3639 + A - the matrix 3640 . bs - size of block 3641 . nz - number of block nonzeros per block row (same for all rows) 3642 - nnz - array containing the number of block nonzeros in the various block rows 3643 (possibly different for each block row) or PETSC_NULL 3644 3645 Options Database Keys: 3646 . -mat_no_unroll - uses code that does not unroll the loops in the 3647 block calculations (much slower) 3648 . -mat_block_size - size of the blocks to use 3649 3650 Level: intermediate 3651 3652 Notes: 3653 If the nnz parameter is given then the nz parameter is ignored 3654 3655 You can call MatGetInfo() to get information on how effective the preallocation was; 3656 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3657 You can also run with the option -info and look for messages with the string 3658 malloc in them to see if additional memory allocation was needed. 3659 3660 The block AIJ format is fully compatible with standard Fortran 77 3661 storage. That is, the stored row and column indices can begin at 3662 either one (as in Fortran) or zero. See the users' manual for details. 3663 3664 Specify the preallocated storage with either nz or nnz (not both). 3665 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3666 allocation. For additional details, see the users manual chapter on 3667 matrices. 3668 3669 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3670 @*/ 3671 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3672 { 3673 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3674 3675 PetscFunctionBegin; 3676 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3677 if (f) { 3678 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3679 } 3680 PetscFunctionReturn(0); 3681 } 3682 3683 #undef __FUNCT__ 3684 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3685 /*@C 3686 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3687 (the default sequential PETSc format). 3688 3689 Collective on MPI_Comm 3690 3691 Input Parameters: 3692 + A - the matrix 3693 . i - the indices into j for the start of each local row (starts with zero) 3694 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3695 - v - optional values in the matrix 3696 3697 Level: developer 3698 3699 .keywords: matrix, aij, compressed row, sparse 3700 3701 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3702 @*/ 3703 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3704 { 3705 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3706 3707 PetscFunctionBegin; 3708 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3709 if (f) { 3710 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3711 } 3712 PetscFunctionReturn(0); 3713 } 3714 3715 3716 #undef __FUNCT__ 3717 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3718 /*@ 3719 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3720 (upper triangular entries in CSR format) provided by the user. 3721 3722 Collective on MPI_Comm 3723 3724 Input Parameters: 3725 + comm - must be an MPI communicator of size 1 3726 . bs - size of block 3727 . m - number of rows 3728 . n - number of columns 3729 . i - row indices 3730 . j - column indices 3731 - a - matrix values 3732 3733 Output Parameter: 3734 . mat - the matrix 3735 3736 Level: intermediate 3737 3738 Notes: 3739 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3740 once the matrix is destroyed 3741 3742 You cannot set new nonzero locations into this matrix, that will generate an error. 3743 3744 The i and j indices are 0 based 3745 3746 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3747 3748 @*/ 3749 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3750 { 3751 PetscErrorCode ierr; 3752 PetscInt ii; 3753 Mat_SeqBAIJ *baij; 3754 3755 PetscFunctionBegin; 3756 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3757 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3758 3759 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3760 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3761 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3762 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3763 baij = (Mat_SeqBAIJ*)(*mat)->data; 3764 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3765 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3766 3767 baij->i = i; 3768 baij->j = j; 3769 baij->a = a; 3770 baij->singlemalloc = PETSC_FALSE; 3771 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3772 baij->free_a = PETSC_FALSE; 3773 baij->free_ij = PETSC_FALSE; 3774 3775 for (ii=0; ii<m; ii++) { 3776 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3777 #if defined(PETSC_USE_DEBUG) 3778 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3779 #endif 3780 } 3781 #if defined(PETSC_USE_DEBUG) 3782 for (ii=0; ii<baij->i[m]; ii++) { 3783 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3784 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3785 } 3786 #endif 3787 3788 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3789 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 PetscFunctionReturn(0); 3791 } 3792