1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for direct calls from Fortran (Used in PETSc-fun3d) 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 590 stepval = (n-1)*4; 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 rp = aj + ai[row]; 594 ap = aa + 16*ai[row]; 595 nrow = ailen[row]; 596 low = 0; 597 high = nrow; 598 for (l=0; l<n; l++) { /* loop over added columns */ 599 col = in[l]; 600 if (col <= lastcol) low = 0; else high = nrow; 601 lastcol = col; 602 value = v + k*(stepval+4 + l)*4; 603 while (high-low > 7) { 604 t = (low+high)/2; 605 if (rp[t] > col) high = t; 606 else low = t; 607 } 608 for (i=low; i<high; i++) { 609 if (rp[i] > col) break; 610 if (rp[i] == col) { 611 bap = ap + 16*i; 612 for (ii=0; ii<4; ii++,value+=stepval) { 613 for (jj=ii; jj<16; jj+=4) { 614 bap[jj] += *value++; 615 } 616 } 617 goto noinsert2; 618 } 619 } 620 N = nrow++ - 1; 621 high++; /* added new column index thus must search to one higher than before */ 622 /* shift up all the later entries in this row */ 623 for (ii=N; ii>=i; ii--) { 624 rp[ii+1] = rp[ii]; 625 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 626 } 627 if (N >= i) { 628 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 629 } 630 rp[i] = col; 631 bap = ap + 16*i; 632 for (ii=0; ii<4; ii++,value+=stepval) { 633 for (jj=ii; jj<16; jj+=4) { 634 bap[jj] = *value++; 635 } 636 } 637 noinsert2:; 638 low = i; 639 } 640 ailen[row] = nrow; 641 } 642 PetscFunctionReturnVoid(); 643 } 644 EXTERN_C_END 645 646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 647 #define matsetvalues4_ MATSETVALUES4 648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 649 #define matsetvalues4_ matsetvalues4 650 #endif 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSetValues4_" 655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 656 { 657 Mat A = *AA; 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 660 PetscInt *ai=a->i,*ailen=a->ilen; 661 PetscInt *aj=a->j,brow,bcol; 662 PetscInt ridx,cidx,lastcol = -1; 663 MatScalar *ap,value,*aa=a->a,*bap; 664 665 PetscFunctionBegin; 666 for (k=0; k<m; k++) { /* loop over added rows */ 667 row = im[k]; brow = row/4; 668 rp = aj + ai[brow]; 669 ap = aa + 16*ai[brow]; 670 nrow = ailen[brow]; 671 low = 0; 672 high = nrow; 673 for (l=0; l<n; l++) { /* loop over added columns */ 674 col = in[l]; bcol = col/4; 675 ridx = row % 4; cidx = col % 4; 676 value = v[l + k*n]; 677 if (col <= lastcol) low = 0; else high = nrow; 678 lastcol = col; 679 while (high-low > 7) { 680 t = (low+high)/2; 681 if (rp[t] > bcol) high = t; 682 else low = t; 683 } 684 for (i=low; i<high; i++) { 685 if (rp[i] > bcol) break; 686 if (rp[i] == bcol) { 687 bap = ap + 16*i + 4*cidx + ridx; 688 *bap += value; 689 goto noinsert1; 690 } 691 } 692 N = nrow++ - 1; 693 high++; /* added new column thus must search to one higher than before */ 694 /* shift up all the later entries in this row */ 695 for (ii=N; ii>=i; ii--) { 696 rp[ii+1] = rp[ii]; 697 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 698 } 699 if (N>=i) { 700 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 701 } 702 rp[i] = bcol; 703 ap[16*i + 4*cidx + ridx] = value; 704 noinsert1:; 705 low = i; 706 } 707 ailen[brow] = nrow; 708 } 709 PetscFunctionReturnVoid(); 710 } 711 EXTERN_C_END 712 713 /* UGLY, ugly, ugly 714 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 715 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 716 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 717 converts the entries into single precision and then calls ..._MatScalar() to put them 718 into the single precision data structures. 719 */ 720 #if defined(PETSC_USE_MAT_SINGLE) 721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 722 #else 723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 724 #endif 725 726 #define CHUNKSIZE 10 727 728 /* 729 Checks for missing diagonals 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 PetscErrorCode ierr; 737 PetscInt *diag,*jj = a->j,i; 738 739 PetscFunctionBegin; 740 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 741 diag = a->diag; 742 for (i=0; i<a->mbs; i++) { 743 if (jj[diag[i]] != i) { 744 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 745 } 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 755 PetscErrorCode ierr; 756 PetscInt i,j,*diag,m = a->mbs; 757 758 PetscFunctionBegin; 759 if (a->diag) PetscFunctionReturn(0); 760 761 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 762 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 763 for (i=0; i<m; i++) { 764 diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 diag[i] = j; 768 break; 769 } 770 } 771 } 772 a->diag = diag; 773 PetscFunctionReturn(0); 774 } 775 776 777 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 781 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 782 { 783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 784 PetscErrorCode ierr; 785 PetscInt n = a->mbs,i; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 792 } else if (oshift == 1) { 793 /* temporarily add 1 to i and j indices */ 794 PetscInt nz = a->i[n]; 795 for (i=0; i<nz; i++) a->j[i]++; 796 for (i=0; i<n+1; i++) a->i[i]++; 797 *ia = a->i; *ja = a->j; 798 } else { 799 *ia = a->i; *ja = a->j; 800 } 801 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 807 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 810 PetscErrorCode ierr; 811 PetscInt i,n = a->mbs; 812 813 PetscFunctionBegin; 814 if (!ia) PetscFunctionReturn(0); 815 if (symmetric) { 816 ierr = PetscFree(*ia);CHKERRQ(ierr); 817 ierr = PetscFree(*ja);CHKERRQ(ierr); 818 } else if (oshift == 1) { 819 PetscInt nz = a->i[n]-1; 820 for (i=0; i<nz; i++) a->j[i]--; 821 for (i=0; i<n+1; i++) a->i[i]--; 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatDestroy_SeqBAIJ" 828 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 829 { 830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 #if defined(PETSC_USE_LOG) 835 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 836 #endif 837 if (a->freedata){ 838 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 839 } 840 if (a->row) { 841 ierr = ISDestroy(a->row);CHKERRQ(ierr); 842 } 843 if (a->col) { 844 ierr = ISDestroy(a->col);CHKERRQ(ierr); 845 } 846 ierr = PetscFree(a->diag);CHKERRQ(ierr); 847 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 848 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 849 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 850 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 851 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 852 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 853 #if defined(PETSC_USE_MAT_SINGLE) 854 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 855 #endif 856 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 857 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 858 859 ierr = PetscFree(a);CHKERRQ(ierr); 860 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatSetOption_SeqBAIJ" 873 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 874 { 875 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 876 PetscErrorCode ierr; 877 878 PetscFunctionBegin; 879 switch (op) { 880 case MAT_ROW_ORIENTED: 881 a->roworiented = PETSC_TRUE; 882 break; 883 case MAT_COLUMN_ORIENTED: 884 a->roworiented = PETSC_FALSE; 885 break; 886 case MAT_COLUMNS_SORTED: 887 a->sorted = PETSC_TRUE; 888 break; 889 case MAT_COLUMNS_UNSORTED: 890 a->sorted = PETSC_FALSE; 891 break; 892 case MAT_KEEP_ZEROED_ROWS: 893 a->keepzeroedrows = PETSC_TRUE; 894 break; 895 case MAT_NO_NEW_NONZERO_LOCATIONS: 896 a->nonew = 1; 897 break; 898 case MAT_NEW_NONZERO_LOCATION_ERR: 899 a->nonew = -1; 900 break; 901 case MAT_NEW_NONZERO_ALLOCATION_ERR: 902 a->nonew = -2; 903 break; 904 case MAT_YES_NEW_NONZERO_LOCATIONS: 905 a->nonew = 0; 906 break; 907 case MAT_ROWS_SORTED: 908 case MAT_ROWS_UNSORTED: 909 case MAT_YES_NEW_DIAGONALS: 910 case MAT_IGNORE_OFF_PROC_ENTRIES: 911 case MAT_USE_HASH_TABLE: 912 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 913 break; 914 case MAT_NO_NEW_DIAGONALS: 915 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 916 case MAT_SYMMETRIC: 917 case MAT_STRUCTURALLY_SYMMETRIC: 918 case MAT_NOT_SYMMETRIC: 919 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 920 case MAT_HERMITIAN: 921 case MAT_NOT_HERMITIAN: 922 case MAT_SYMMETRY_ETERNAL: 923 case MAT_NOT_SYMMETRY_ETERNAL: 924 break; 925 default: 926 SETERRQ(PETSC_ERR_SUP,"unknown option"); 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatGetRow_SeqBAIJ" 933 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 934 { 935 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 938 MatScalar *aa,*aa_i; 939 PetscScalar *v_i; 940 941 PetscFunctionBegin; 942 bs = A->rmap.bs; 943 ai = a->i; 944 aj = a->j; 945 aa = a->a; 946 bs2 = a->bs2; 947 948 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 949 950 bn = row/bs; /* Block number */ 951 bp = row % bs; /* Block Position */ 952 M = ai[bn+1] - ai[bn]; 953 *nz = bs*M; 954 955 if (v) { 956 *v = 0; 957 if (*nz) { 958 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 959 for (i=0; i<M; i++) { /* for each block in the block row */ 960 v_i = *v + i*bs; 961 aa_i = aa + bs2*(ai[bn] + i); 962 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 963 } 964 } 965 } 966 967 if (idx) { 968 *idx = 0; 969 if (*nz) { 970 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 971 for (i=0; i<M; i++) { /* for each block in the block row */ 972 idx_i = *idx + i*bs; 973 itmp = bs*aj[ai[bn] + i]; 974 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 975 } 976 } 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 983 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 984 { 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 989 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatTranspose_SeqBAIJ" 995 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 996 { 997 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 998 Mat C; 999 PetscErrorCode ierr; 1000 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1001 PetscInt *rows,*cols,bs2=a->bs2; 1002 PetscScalar *array; 1003 1004 PetscFunctionBegin; 1005 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1006 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1007 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1008 1009 #if defined(PETSC_USE_MAT_SINGLE) 1010 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1011 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1012 #else 1013 array = a->a; 1014 #endif 1015 1016 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1017 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1018 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1019 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1020 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1021 ierr = PetscFree(col);CHKERRQ(ierr); 1022 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1023 cols = rows + bs; 1024 for (i=0; i<mbs; i++) { 1025 cols[0] = i*bs; 1026 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1027 len = ai[i+1] - ai[i]; 1028 for (j=0; j<len; j++) { 1029 rows[0] = (*aj++)*bs; 1030 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1031 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1032 array += bs2; 1033 } 1034 } 1035 ierr = PetscFree(rows);CHKERRQ(ierr); 1036 #if defined(PETSC_USE_MAT_SINGLE) 1037 ierr = PetscFree(array); 1038 #endif 1039 1040 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 1043 if (B) { 1044 *B = C; 1045 } else { 1046 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1053 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1054 { 1055 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1058 int fd; 1059 PetscScalar *aa; 1060 FILE *file; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1064 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1065 col_lens[0] = MAT_FILE_COOKIE; 1066 1067 col_lens[1] = A->rmap.N; 1068 col_lens[2] = A->cmap.n; 1069 col_lens[3] = a->nz*bs2; 1070 1071 /* store lengths of each row and write (including header) to file */ 1072 count = 0; 1073 for (i=0; i<a->mbs; i++) { 1074 for (j=0; j<bs; j++) { 1075 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1076 } 1077 } 1078 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1079 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1080 1081 /* store column indices (zero start index) */ 1082 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1083 count = 0; 1084 for (i=0; i<a->mbs; i++) { 1085 for (j=0; j<bs; j++) { 1086 for (k=a->i[i]; k<a->i[i+1]; k++) { 1087 for (l=0; l<bs; l++) { 1088 jj[count++] = bs*a->j[k] + l; 1089 } 1090 } 1091 } 1092 } 1093 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1094 ierr = PetscFree(jj);CHKERRQ(ierr); 1095 1096 /* store nonzero values */ 1097 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1098 count = 0; 1099 for (i=0; i<a->mbs; i++) { 1100 for (j=0; j<bs; j++) { 1101 for (k=a->i[i]; k<a->i[i+1]; k++) { 1102 for (l=0; l<bs; l++) { 1103 aa[count++] = a->a[bs2*k + l*bs + j]; 1104 } 1105 } 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1109 ierr = PetscFree(aa);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1112 if (file) { 1113 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1120 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1125 PetscViewerFormat format; 1126 1127 PetscFunctionBegin; 1128 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1129 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1130 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1131 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1132 Mat aij; 1133 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1134 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1135 ierr = MatDestroy(aij);CHKERRQ(ierr); 1136 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1137 PetscFunctionReturn(0); 1138 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1139 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1140 for (i=0; i<a->mbs; i++) { 1141 for (j=0; j<bs; j++) { 1142 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1143 for (k=a->i[i]; k<a->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 #if defined(PETSC_USE_COMPLEX) 1146 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1151 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1152 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1153 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1154 } 1155 #else 1156 if (a->a[bs2*k + l*bs + j] != 0.0) { 1157 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1158 } 1159 #endif 1160 } 1161 } 1162 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1163 } 1164 } 1165 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1166 } else { 1167 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1168 for (i=0; i<a->mbs; i++) { 1169 for (j=0; j<bs; j++) { 1170 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1171 for (k=a->i[i]; k<a->i[i+1]; k++) { 1172 for (l=0; l<bs; l++) { 1173 #if defined(PETSC_USE_COMPLEX) 1174 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1179 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1182 } 1183 #else 1184 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1185 #endif 1186 } 1187 } 1188 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1189 } 1190 } 1191 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1192 } 1193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1199 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1200 { 1201 Mat A = (Mat) Aa; 1202 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1205 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1206 MatScalar *aa; 1207 PetscViewer viewer; 1208 1209 PetscFunctionBegin; 1210 1211 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1212 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1213 1214 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1215 1216 /* loop over matrix elements drawing boxes */ 1217 color = PETSC_DRAW_BLUE; 1218 for (i=0,row=0; i<mbs; i++,row+=bs) { 1219 for (j=a->i[i]; j<a->i[i+1]; j++) { 1220 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1221 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1222 aa = a->a + j*bs2; 1223 for (k=0; k<bs; k++) { 1224 for (l=0; l<bs; l++) { 1225 if (PetscRealPart(*aa++) >= 0.) continue; 1226 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1227 } 1228 } 1229 } 1230 } 1231 color = PETSC_DRAW_CYAN; 1232 for (i=0,row=0; i<mbs; i++,row+=bs) { 1233 for (j=a->i[i]; j<a->i[i+1]; j++) { 1234 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1235 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1236 aa = a->a + j*bs2; 1237 for (k=0; k<bs; k++) { 1238 for (l=0; l<bs; l++) { 1239 if (PetscRealPart(*aa++) != 0.) continue; 1240 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1241 } 1242 } 1243 } 1244 } 1245 1246 color = PETSC_DRAW_RED; 1247 for (i=0,row=0; i<mbs; i++,row+=bs) { 1248 for (j=a->i[i]; j<a->i[i+1]; j++) { 1249 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1250 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1251 aa = a->a + j*bs2; 1252 for (k=0; k<bs; k++) { 1253 for (l=0; l<bs; l++) { 1254 if (PetscRealPart(*aa++) <= 0.) continue; 1255 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1256 } 1257 } 1258 } 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1265 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1266 { 1267 PetscErrorCode ierr; 1268 PetscReal xl,yl,xr,yr,w,h; 1269 PetscDraw draw; 1270 PetscTruth isnull; 1271 1272 PetscFunctionBegin; 1273 1274 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1275 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1276 1277 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1278 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1279 xr += w; yr += h; xl = -w; yl = -h; 1280 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1281 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1282 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatView_SeqBAIJ" 1288 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1289 { 1290 PetscErrorCode ierr; 1291 PetscTruth iascii,isbinary,isdraw; 1292 1293 PetscFunctionBegin; 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1296 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1297 if (iascii){ 1298 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1299 } else if (isbinary) { 1300 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1301 } else if (isdraw) { 1302 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1303 } else { 1304 Mat B; 1305 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1306 ierr = MatView(B,viewer);CHKERRQ(ierr); 1307 ierr = MatDestroy(B);CHKERRQ(ierr); 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1315 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1318 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1319 PetscInt *ai = a->i,*ailen = a->ilen; 1320 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1321 MatScalar *ap,*aa = a->a,zero = 0.0; 1322 1323 PetscFunctionBegin; 1324 for (k=0; k<m; k++) { /* loop over rows */ 1325 row = im[k]; brow = row/bs; 1326 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1327 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1328 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1329 nrow = ailen[brow]; 1330 for (l=0; l<n; l++) { /* loop over columns */ 1331 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1332 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1333 col = in[l] ; 1334 bcol = col/bs; 1335 cidx = col%bs; 1336 ridx = row%bs; 1337 high = nrow; 1338 low = 0; /* assume unsorted */ 1339 while (high-low > 5) { 1340 t = (low+high)/2; 1341 if (rp[t] > bcol) high = t; 1342 else low = t; 1343 } 1344 for (i=low; i<high; i++) { 1345 if (rp[i] > bcol) break; 1346 if (rp[i] == bcol) { 1347 *v++ = ap[bs2*i+bs*cidx+ridx]; 1348 goto finished; 1349 } 1350 } 1351 *v++ = zero; 1352 finished:; 1353 } 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #if defined(PETSC_USE_MAT_SINGLE) 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1361 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1362 { 1363 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1364 PetscErrorCode ierr; 1365 PetscInt i,N = m*n*b->bs2; 1366 MatScalar *vsingle; 1367 1368 PetscFunctionBegin; 1369 if (N > b->setvalueslen) { 1370 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1371 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1372 b->setvalueslen = N; 1373 } 1374 vsingle = b->setvaluescopy; 1375 for (i=0; i<N; i++) { 1376 vsingle[i] = v[i]; 1377 } 1378 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 #endif 1382 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1386 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1387 { 1388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1389 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1390 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1391 PetscErrorCode ierr; 1392 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1393 PetscTruth roworiented=a->roworiented; 1394 const MatScalar *value = v; 1395 MatScalar *ap,*aa = a->a,*bap; 1396 1397 PetscFunctionBegin; 1398 if (roworiented) { 1399 stepval = (n-1)*bs; 1400 } else { 1401 stepval = (m-1)*bs; 1402 } 1403 for (k=0; k<m; k++) { /* loop over added rows */ 1404 row = im[k]; 1405 if (row < 0) continue; 1406 #if defined(PETSC_USE_DEBUG) 1407 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1408 #endif 1409 rp = aj + ai[row]; 1410 ap = aa + bs2*ai[row]; 1411 rmax = imax[row]; 1412 nrow = ailen[row]; 1413 low = 0; 1414 high = nrow; 1415 for (l=0; l<n; l++) { /* loop over added columns */ 1416 if (in[l] < 0) continue; 1417 #if defined(PETSC_USE_DEBUG) 1418 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1419 #endif 1420 col = in[l]; 1421 if (roworiented) { 1422 value = v + k*(stepval+bs)*bs + l*bs; 1423 } else { 1424 value = v + l*(stepval+bs)*bs + k*bs; 1425 } 1426 if (col <= lastcol) low = 0; else high = nrow; 1427 lastcol = col; 1428 while (high-low > 7) { 1429 t = (low+high)/2; 1430 if (rp[t] > col) high = t; 1431 else low = t; 1432 } 1433 for (i=low; i<high; i++) { 1434 if (rp[i] > col) break; 1435 if (rp[i] == col) { 1436 bap = ap + bs2*i; 1437 if (roworiented) { 1438 if (is == ADD_VALUES) { 1439 for (ii=0; ii<bs; ii++,value+=stepval) { 1440 for (jj=ii; jj<bs2; jj+=bs) { 1441 bap[jj] += *value++; 1442 } 1443 } 1444 } else { 1445 for (ii=0; ii<bs; ii++,value+=stepval) { 1446 for (jj=ii; jj<bs2; jj+=bs) { 1447 bap[jj] = *value++; 1448 } 1449 } 1450 } 1451 } else { 1452 if (is == ADD_VALUES) { 1453 for (ii=0; ii<bs; ii++,value+=stepval) { 1454 for (jj=0; jj<bs; jj++) { 1455 *bap++ += *value++; 1456 } 1457 } 1458 } else { 1459 for (ii=0; ii<bs; ii++,value+=stepval) { 1460 for (jj=0; jj<bs; jj++) { 1461 *bap++ = *value++; 1462 } 1463 } 1464 } 1465 } 1466 goto noinsert2; 1467 } 1468 } 1469 if (nonew == 1) goto noinsert2; 1470 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1471 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1472 N = nrow++ - 1; high++; 1473 /* shift up all the later entries in this row */ 1474 for (ii=N; ii>=i; ii--) { 1475 rp[ii+1] = rp[ii]; 1476 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1477 } 1478 if (N >= i) { 1479 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1480 } 1481 rp[i] = col; 1482 bap = ap + bs2*i; 1483 if (roworiented) { 1484 for (ii=0; ii<bs; ii++,value+=stepval) { 1485 for (jj=ii; jj<bs2; jj+=bs) { 1486 bap[jj] = *value++; 1487 } 1488 } 1489 } else { 1490 for (ii=0; ii<bs; ii++,value+=stepval) { 1491 for (jj=0; jj<bs; jj++) { 1492 *bap++ = *value++; 1493 } 1494 } 1495 } 1496 noinsert2:; 1497 low = i; 1498 } 1499 ailen[row] = nrow; 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1506 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1507 { 1508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1509 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1510 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1511 PetscErrorCode ierr; 1512 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1513 MatScalar *aa = a->a,*ap; 1514 PetscReal ratio=0.6; 1515 1516 PetscFunctionBegin; 1517 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1518 1519 if (m) rmax = ailen[0]; 1520 for (i=1; i<mbs; i++) { 1521 /* move each row back by the amount of empty slots (fshift) before it*/ 1522 fshift += imax[i-1] - ailen[i-1]; 1523 rmax = PetscMax(rmax,ailen[i]); 1524 if (fshift) { 1525 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1526 N = ailen[i]; 1527 for (j=0; j<N; j++) { 1528 ip[j-fshift] = ip[j]; 1529 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1530 } 1531 } 1532 ai[i] = ai[i-1] + ailen[i-1]; 1533 } 1534 if (mbs) { 1535 fshift += imax[mbs-1] - ailen[mbs-1]; 1536 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1537 } 1538 /* reset ilen and imax for each row */ 1539 for (i=0; i<mbs; i++) { 1540 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1541 } 1542 a->nz = ai[mbs]; 1543 1544 /* diagonals may have moved, so kill the diagonal pointers */ 1545 a->idiagvalid = PETSC_FALSE; 1546 if (fshift && a->diag) { 1547 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1548 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1549 a->diag = 0; 1550 } 1551 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1552 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1553 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1554 a->reallocs = 0; 1555 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1556 1557 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1558 if (a->compressedrow.use){ 1559 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1560 } 1561 1562 A->same_nonzero = PETSC_TRUE; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 /* 1567 This function returns an array of flags which indicate the locations of contiguous 1568 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1569 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1570 Assume: sizes should be long enough to hold all the values. 1571 */ 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1574 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1575 { 1576 PetscInt i,j,k,row; 1577 PetscTruth flg; 1578 1579 PetscFunctionBegin; 1580 for (i=0,j=0; i<n; j++) { 1581 row = idx[i]; 1582 if (row%bs!=0) { /* Not the begining of a block */ 1583 sizes[j] = 1; 1584 i++; 1585 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1586 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1587 i++; 1588 } else { /* Begining of the block, so check if the complete block exists */ 1589 flg = PETSC_TRUE; 1590 for (k=1; k<bs; k++) { 1591 if (row+k != idx[i+k]) { /* break in the block */ 1592 flg = PETSC_FALSE; 1593 break; 1594 } 1595 } 1596 if (flg) { /* No break in the bs */ 1597 sizes[j] = bs; 1598 i+= bs; 1599 } else { 1600 sizes[j] = 1; 1601 i++; 1602 } 1603 } 1604 } 1605 *bs_max = j; 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1611 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1612 { 1613 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1614 PetscErrorCode ierr; 1615 PetscInt i,j,k,count,*rows; 1616 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1617 PetscScalar zero = 0.0; 1618 MatScalar *aa; 1619 1620 PetscFunctionBegin; 1621 /* Make a copy of the IS and sort it */ 1622 /* allocate memory for rows,sizes */ 1623 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1624 sizes = rows + is_n; 1625 1626 /* copy IS values to rows, and sort them */ 1627 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1628 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1629 if (baij->keepzeroedrows) { 1630 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1631 bs_max = is_n; 1632 A->same_nonzero = PETSC_TRUE; 1633 } else { 1634 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1635 A->same_nonzero = PETSC_FALSE; 1636 } 1637 1638 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1639 row = rows[j]; 1640 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1641 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1642 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1643 if (sizes[i] == bs && !baij->keepzeroedrows) { 1644 if (diag != 0.0) { 1645 if (baij->ilen[row/bs] > 0) { 1646 baij->ilen[row/bs] = 1; 1647 baij->j[baij->i[row/bs]] = row/bs; 1648 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1649 } 1650 /* Now insert all the diagonal values for this bs */ 1651 for (k=0; k<bs; k++) { 1652 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 } else { /* (diag == 0.0) */ 1655 baij->ilen[row/bs] = 0; 1656 } /* end (diag == 0.0) */ 1657 } else { /* (sizes[i] != bs) */ 1658 #if defined (PETSC_USE_DEBUG) 1659 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1660 #endif 1661 for (k=0; k<count; k++) { 1662 aa[0] = zero; 1663 aa += bs; 1664 } 1665 if (diag != 0.0) { 1666 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 1671 ierr = PetscFree(rows);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1681 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1682 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1683 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1684 PetscErrorCode ierr; 1685 PetscInt ridx,cidx,bs2=a->bs2; 1686 PetscTruth roworiented=a->roworiented; 1687 MatScalar *ap,value,*aa=a->a,*bap; 1688 1689 PetscFunctionBegin; 1690 for (k=0; k<m; k++) { /* loop over added rows */ 1691 row = im[k]; 1692 brow = row/bs; 1693 if (row < 0) continue; 1694 #if defined(PETSC_USE_DEBUG) 1695 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1696 #endif 1697 rp = aj + ai[brow]; 1698 ap = aa + bs2*ai[brow]; 1699 rmax = imax[brow]; 1700 nrow = ailen[brow]; 1701 low = 0; 1702 high = nrow; 1703 for (l=0; l<n; l++) { /* loop over added columns */ 1704 if (in[l] < 0) continue; 1705 #if defined(PETSC_USE_DEBUG) 1706 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1707 #endif 1708 col = in[l]; bcol = col/bs; 1709 ridx = row % bs; cidx = col % bs; 1710 if (roworiented) { 1711 value = v[l + k*n]; 1712 } else { 1713 value = v[k + l*m]; 1714 } 1715 if (col <= lastcol) low = 0; else high = nrow; 1716 lastcol = col; 1717 while (high-low > 7) { 1718 t = (low+high)/2; 1719 if (rp[t] > bcol) high = t; 1720 else low = t; 1721 } 1722 for (i=low; i<high; i++) { 1723 if (rp[i] > bcol) break; 1724 if (rp[i] == bcol) { 1725 bap = ap + bs2*i + bs*cidx + ridx; 1726 if (is == ADD_VALUES) *bap += value; 1727 else *bap = value; 1728 goto noinsert1; 1729 } 1730 } 1731 if (nonew == 1) goto noinsert1; 1732 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1733 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1734 N = nrow++ - 1; high++; 1735 /* shift up all the later entries in this row */ 1736 for (ii=N; ii>=i; ii--) { 1737 rp[ii+1] = rp[ii]; 1738 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1739 } 1740 if (N>=i) { 1741 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1742 } 1743 rp[i] = bcol; 1744 ap[bs2*i + bs*cidx + ridx] = value; 1745 a->nz++; 1746 noinsert1:; 1747 low = i; 1748 } 1749 ailen[brow] = nrow; 1750 } 1751 A->same_nonzero = PETSC_FALSE; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1758 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1759 { 1760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1761 Mat outA; 1762 PetscErrorCode ierr; 1763 PetscTruth row_identity,col_identity; 1764 1765 PetscFunctionBegin; 1766 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1767 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1768 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1769 if (!row_identity || !col_identity) { 1770 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1771 } 1772 1773 outA = inA; 1774 inA->factor = FACTOR_LU; 1775 1776 if (!a->diag) { 1777 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1778 } 1779 1780 a->row = row; 1781 a->col = col; 1782 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1783 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1784 1785 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1786 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1787 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1788 1789 /* 1790 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1791 for ILU(0) factorization with natural ordering 1792 */ 1793 if (inA->rmap.bs < 8) { 1794 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1795 } else { 1796 if (!a->solve_work) { 1797 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1798 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1799 } 1800 } 1801 1802 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1803 1804 PetscFunctionReturn(0); 1805 } 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1808 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1809 { 1810 static PetscTruth called = PETSC_FALSE; 1811 MPI_Comm comm = A->comm; 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1816 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1817 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 EXTERN_C_BEGIN 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1824 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1825 { 1826 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1827 PetscInt i,nz,nbs; 1828 1829 PetscFunctionBegin; 1830 nz = baij->maxnz/baij->bs2; 1831 nbs = baij->nbs; 1832 for (i=0; i<nz; i++) { 1833 baij->j[i] = indices[i]; 1834 } 1835 baij->nz = nz; 1836 for (i=0; i<nbs; i++) { 1837 baij->ilen[i] = baij->imax[i]; 1838 } 1839 1840 PetscFunctionReturn(0); 1841 } 1842 EXTERN_C_END 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1846 /*@ 1847 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1848 in the matrix. 1849 1850 Input Parameters: 1851 + mat - the SeqBAIJ matrix 1852 - indices - the column indices 1853 1854 Level: advanced 1855 1856 Notes: 1857 This can be called if you have precomputed the nonzero structure of the 1858 matrix and want to provide it to the matrix object to improve the performance 1859 of the MatSetValues() operation. 1860 1861 You MUST have set the correct numbers of nonzeros per row in the call to 1862 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1863 1864 MUST be called before any calls to MatSetValues(); 1865 1866 @*/ 1867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1868 { 1869 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1870 1871 PetscFunctionBegin; 1872 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1873 PetscValidPointer(indices,2); 1874 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1875 if (f) { 1876 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1877 } else { 1878 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1885 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1886 { 1887 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1888 PetscErrorCode ierr; 1889 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1890 PetscReal atmp; 1891 PetscScalar *x,zero = 0.0; 1892 MatScalar *aa; 1893 PetscInt ncols,brow,krow,kcol; 1894 1895 PetscFunctionBegin; 1896 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1897 bs = A->rmap.bs; 1898 aa = a->a; 1899 ai = a->i; 1900 aj = a->j; 1901 mbs = a->mbs; 1902 1903 ierr = VecSet(v,zero);CHKERRQ(ierr); 1904 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1905 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1906 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1907 for (i=0; i<mbs; i++) { 1908 ncols = ai[1] - ai[0]; ai++; 1909 brow = bs*i; 1910 for (j=0; j<ncols; j++){ 1911 /* bcol = bs*(*aj); */ 1912 for (kcol=0; kcol<bs; kcol++){ 1913 for (krow=0; krow<bs; krow++){ 1914 atmp = PetscAbsScalar(*aa); aa++; 1915 row = brow + krow; /* row index */ 1916 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1917 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1918 } 1919 } 1920 aj++; 1921 } 1922 } 1923 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatCopy_SeqBAIJ" 1929 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1930 { 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 /* If the two matrices have the same copy implementation, use fast copy. */ 1935 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1936 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1937 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1938 1939 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1940 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1941 } 1942 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1951 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1952 { 1953 PetscErrorCode ierr; 1954 1955 PetscFunctionBegin; 1956 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1962 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1963 { 1964 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1965 PetscFunctionBegin; 1966 *array = a->a; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1972 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1973 { 1974 PetscFunctionBegin; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #include "petscblaslapack.h" 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1981 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1982 { 1983 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1984 PetscErrorCode ierr; 1985 PetscInt i,bs=Y->rmap.bs,j,bs2; 1986 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1987 1988 PetscFunctionBegin; 1989 if (str == SAME_NONZERO_PATTERN) { 1990 PetscScalar alpha = a; 1991 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1992 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1993 if (y->xtoy && y->XtoY != X) { 1994 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1995 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1996 } 1997 if (!y->xtoy) { /* get xtoy */ 1998 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1999 y->XtoY = X; 2000 } 2001 bs2 = bs*bs; 2002 for (i=0; i<x->nz; i++) { 2003 j = 0; 2004 while (j < bs2){ 2005 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2006 j++; 2007 } 2008 } 2009 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2010 } else { 2011 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2018 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2019 { 2020 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2021 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2022 PetscScalar *aa = a->a; 2023 2024 PetscFunctionBegin; 2025 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2031 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2032 { 2033 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2034 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2035 PetscScalar *aa = a->a; 2036 2037 PetscFunctionBegin; 2038 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 2043 /* -------------------------------------------------------------------*/ 2044 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2045 MatGetRow_SeqBAIJ, 2046 MatRestoreRow_SeqBAIJ, 2047 MatMult_SeqBAIJ_N, 2048 /* 4*/ MatMultAdd_SeqBAIJ_N, 2049 MatMultTranspose_SeqBAIJ, 2050 MatMultTransposeAdd_SeqBAIJ, 2051 MatSolve_SeqBAIJ_N, 2052 0, 2053 0, 2054 /*10*/ 0, 2055 MatLUFactor_SeqBAIJ, 2056 0, 2057 0, 2058 MatTranspose_SeqBAIJ, 2059 /*15*/ MatGetInfo_SeqBAIJ, 2060 MatEqual_SeqBAIJ, 2061 MatGetDiagonal_SeqBAIJ, 2062 MatDiagonalScale_SeqBAIJ, 2063 MatNorm_SeqBAIJ, 2064 /*20*/ 0, 2065 MatAssemblyEnd_SeqBAIJ, 2066 0, 2067 MatSetOption_SeqBAIJ, 2068 MatZeroEntries_SeqBAIJ, 2069 /*25*/ MatZeroRows_SeqBAIJ, 2070 MatLUFactorSymbolic_SeqBAIJ, 2071 MatLUFactorNumeric_SeqBAIJ_N, 2072 MatCholeskyFactorSymbolic_SeqBAIJ, 2073 MatCholeskyFactorNumeric_SeqBAIJ_N, 2074 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2075 MatILUFactorSymbolic_SeqBAIJ, 2076 MatICCFactorSymbolic_SeqBAIJ, 2077 MatGetArray_SeqBAIJ, 2078 MatRestoreArray_SeqBAIJ, 2079 /*35*/ MatDuplicate_SeqBAIJ, 2080 0, 2081 0, 2082 MatILUFactor_SeqBAIJ, 2083 0, 2084 /*40*/ MatAXPY_SeqBAIJ, 2085 MatGetSubMatrices_SeqBAIJ, 2086 MatIncreaseOverlap_SeqBAIJ, 2087 MatGetValues_SeqBAIJ, 2088 MatCopy_SeqBAIJ, 2089 /*45*/ MatPrintHelp_SeqBAIJ, 2090 MatScale_SeqBAIJ, 2091 0, 2092 0, 2093 0, 2094 /*50*/ 0, 2095 MatGetRowIJ_SeqBAIJ, 2096 MatRestoreRowIJ_SeqBAIJ, 2097 0, 2098 0, 2099 /*55*/ 0, 2100 0, 2101 0, 2102 0, 2103 MatSetValuesBlocked_SeqBAIJ, 2104 /*60*/ MatGetSubMatrix_SeqBAIJ, 2105 MatDestroy_SeqBAIJ, 2106 MatView_SeqBAIJ, 2107 0, 2108 0, 2109 /*65*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*70*/ MatGetRowMax_SeqBAIJ, 2115 MatConvert_Basic, 2116 0, 2117 0, 2118 0, 2119 /*75*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*80*/ 0, 2125 0, 2126 0, 2127 0, 2128 MatLoad_SeqBAIJ, 2129 /*85*/ 0, 2130 0, 2131 0, 2132 0, 2133 0, 2134 /*90*/ 0, 2135 0, 2136 0, 2137 0, 2138 0, 2139 /*95*/ 0, 2140 0, 2141 0, 2142 0, 2143 0, 2144 /*100*/0, 2145 0, 2146 0, 2147 0, 2148 0, 2149 /*105*/0, 2150 MatRealPart_SeqBAIJ, 2151 MatImaginaryPart_SeqBAIJ 2152 }; 2153 2154 EXTERN_C_BEGIN 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2157 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2158 { 2159 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2160 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 if (aij->nonew != 1) { 2165 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2166 } 2167 2168 /* allocate space for values if not already there */ 2169 if (!aij->saved_values) { 2170 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2171 } 2172 2173 /* copy values over */ 2174 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 EXTERN_C_END 2178 2179 EXTERN_C_BEGIN 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2182 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2183 { 2184 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2185 PetscErrorCode ierr; 2186 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2187 2188 PetscFunctionBegin; 2189 if (aij->nonew != 1) { 2190 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2191 } 2192 if (!aij->saved_values) { 2193 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2194 } 2195 2196 /* copy values over */ 2197 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 EXTERN_C_END 2201 2202 EXTERN_C_BEGIN 2203 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2204 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2205 EXTERN_C_END 2206 2207 EXTERN_C_BEGIN 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2211 { 2212 Mat_SeqBAIJ *b; 2213 PetscErrorCode ierr; 2214 PetscInt i,mbs,nbs,bs2,newbs = bs; 2215 PetscTruth flg,skipallocation = PETSC_FALSE; 2216 2217 PetscFunctionBegin; 2218 2219 if (nz == MAT_SKIP_ALLOCATION) { 2220 skipallocation = PETSC_TRUE; 2221 nz = 0; 2222 } 2223 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2224 if (nnz && newbs != bs) { 2225 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2226 } 2227 bs = newbs; 2228 2229 B->rmap.bs = B->cmap.bs = bs; 2230 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2231 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2232 2233 B->preallocated = PETSC_TRUE; 2234 2235 mbs = B->rmap.n/bs; 2236 nbs = B->cmap.n/bs; 2237 bs2 = bs*bs; 2238 2239 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2240 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2241 } 2242 2243 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2244 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2245 if (nnz) { 2246 for (i=0; i<mbs; i++) { 2247 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2248 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2249 } 2250 } 2251 2252 b = (Mat_SeqBAIJ*)B->data; 2253 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2254 B->ops->solve = MatSolve_SeqBAIJ_Update; 2255 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2256 if (!flg) { 2257 switch (bs) { 2258 case 1: 2259 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2260 B->ops->mult = MatMult_SeqBAIJ_1; 2261 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2262 break; 2263 case 2: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2265 B->ops->mult = MatMult_SeqBAIJ_2; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2268 break; 2269 case 3: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2271 B->ops->mult = MatMult_SeqBAIJ_3; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2274 break; 2275 case 4: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2277 B->ops->mult = MatMult_SeqBAIJ_4; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2279 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2280 break; 2281 case 5: 2282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2283 B->ops->mult = MatMult_SeqBAIJ_5; 2284 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2285 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2286 break; 2287 case 6: 2288 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2289 B->ops->mult = MatMult_SeqBAIJ_6; 2290 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2291 break; 2292 case 7: 2293 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2294 B->ops->mult = MatMult_SeqBAIJ_7; 2295 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2296 break; 2297 default: 2298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2299 B->ops->mult = MatMult_SeqBAIJ_N; 2300 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2301 break; 2302 } 2303 } 2304 B->rmap.bs = bs; 2305 b->mbs = mbs; 2306 b->nbs = nbs; 2307 if (!skipallocation) { 2308 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2309 /* b->ilen will count nonzeros in each block row so far. */ 2310 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2311 if (!nnz) { 2312 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2313 else if (nz <= 0) nz = 1; 2314 for (i=0; i<mbs; i++) b->imax[i] = nz; 2315 nz = nz*mbs; 2316 } else { 2317 nz = 0; 2318 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2319 } 2320 2321 /* allocate the matrix space */ 2322 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2323 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2324 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2325 b->singlemalloc = PETSC_TRUE; 2326 2327 b->i[0] = 0; 2328 for (i=1; i<mbs+1; i++) { 2329 b->i[i] = b->i[i-1] + b->imax[i-1]; 2330 } 2331 } 2332 2333 B->rmap.bs = bs; 2334 b->bs2 = bs2; 2335 b->mbs = mbs; 2336 b->nz = 0; 2337 b->maxnz = nz*bs2; 2338 B->info.nz_unneeded = (PetscReal)b->maxnz; 2339 PetscFunctionReturn(0); 2340 } 2341 EXTERN_C_END 2342 2343 /*MC 2344 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2345 block sparse compressed row format. 2346 2347 Options Database Keys: 2348 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2349 2350 Level: beginner 2351 2352 .seealso: MatCreateSeqBAIJ() 2353 M*/ 2354 2355 EXTERN_C_BEGIN 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatCreate_SeqBAIJ" 2358 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2359 { 2360 PetscErrorCode ierr; 2361 PetscMPIInt size; 2362 Mat_SeqBAIJ *b; 2363 2364 PetscFunctionBegin; 2365 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2366 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2367 2368 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2369 B->data = (void*)b; 2370 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2371 B->factor = 0; 2372 B->mapping = 0; 2373 b->row = 0; 2374 b->col = 0; 2375 b->icol = 0; 2376 b->reallocs = 0; 2377 b->saved_values = 0; 2378 #if defined(PETSC_USE_MAT_SINGLE) 2379 b->setvalueslen = 0; 2380 b->setvaluescopy = PETSC_NULL; 2381 #endif 2382 2383 b->sorted = PETSC_FALSE; 2384 b->roworiented = PETSC_TRUE; 2385 b->nonew = 0; 2386 b->diag = 0; 2387 b->solve_work = 0; 2388 b->mult_work = 0; 2389 B->spptr = 0; 2390 B->info.nz_unneeded = (PetscReal)b->maxnz; 2391 b->keepzeroedrows = PETSC_FALSE; 2392 b->xtoy = 0; 2393 b->XtoY = 0; 2394 b->compressedrow.use = PETSC_FALSE; 2395 b->compressedrow.nrows = 0; 2396 b->compressedrow.i = PETSC_NULL; 2397 b->compressedrow.rindex = PETSC_NULL; 2398 b->compressedrow.checked = PETSC_FALSE; 2399 B->same_nonzero = PETSC_FALSE; 2400 2401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2402 "MatInvertBlockDiagonal_SeqBAIJ", 2403 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2405 "MatStoreValues_SeqBAIJ", 2406 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2407 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2408 "MatRetrieveValues_SeqBAIJ", 2409 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2411 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2412 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2414 "MatConvert_SeqBAIJ_SeqAIJ", 2415 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2417 "MatConvert_SeqBAIJ_SeqSBAIJ", 2418 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2420 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2421 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 EXTERN_C_END 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2429 { 2430 Mat C; 2431 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2432 PetscErrorCode ierr; 2433 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2434 2435 PetscFunctionBegin; 2436 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2437 2438 *B = 0; 2439 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2440 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2441 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2442 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2443 c = (Mat_SeqBAIJ*)C->data; 2444 2445 C->rmap.N = A->rmap.N; 2446 C->cmap.N = A->cmap.N; 2447 C->rmap.bs = A->rmap.bs; 2448 c->bs2 = a->bs2; 2449 c->mbs = a->mbs; 2450 c->nbs = a->nbs; 2451 2452 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2453 for (i=0; i<mbs; i++) { 2454 c->imax[i] = a->imax[i]; 2455 c->ilen[i] = a->ilen[i]; 2456 } 2457 2458 /* allocate the matrix space */ 2459 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2460 c->singlemalloc = PETSC_TRUE; 2461 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2462 if (mbs > 0) { 2463 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2464 if (cpvalues == MAT_COPY_VALUES) { 2465 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2466 } else { 2467 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2468 } 2469 } 2470 c->sorted = a->sorted; 2471 c->roworiented = a->roworiented; 2472 c->nonew = a->nonew; 2473 2474 if (a->diag) { 2475 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2476 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2477 for (i=0; i<mbs; i++) { 2478 c->diag[i] = a->diag[i]; 2479 } 2480 } else c->diag = 0; 2481 c->nz = a->nz; 2482 c->maxnz = a->maxnz; 2483 c->solve_work = 0; 2484 c->mult_work = 0; 2485 C->preallocated = PETSC_TRUE; 2486 C->assembled = PETSC_TRUE; 2487 2488 c->compressedrow.use = a->compressedrow.use; 2489 c->compressedrow.nrows = a->compressedrow.nrows; 2490 c->compressedrow.checked = a->compressedrow.checked; 2491 if ( a->compressedrow.checked && a->compressedrow.use){ 2492 i = a->compressedrow.nrows; 2493 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2494 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2495 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2496 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2497 } else { 2498 c->compressedrow.use = PETSC_FALSE; 2499 c->compressedrow.i = PETSC_NULL; 2500 c->compressedrow.rindex = PETSC_NULL; 2501 } 2502 C->same_nonzero = A->same_nonzero; 2503 *B = C; 2504 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatLoad_SeqBAIJ" 2510 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2511 { 2512 Mat_SeqBAIJ *a; 2513 Mat B; 2514 PetscErrorCode ierr; 2515 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2516 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2517 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2518 PetscInt *masked,nmask,tmp,bs2,ishift; 2519 PetscMPIInt size; 2520 int fd; 2521 PetscScalar *aa; 2522 MPI_Comm comm = ((PetscObject)viewer)->comm; 2523 2524 PetscFunctionBegin; 2525 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2526 bs2 = bs*bs; 2527 2528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2529 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2530 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2531 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2532 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2533 M = header[1]; N = header[2]; nz = header[3]; 2534 2535 if (header[3] < 0) { 2536 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2537 } 2538 2539 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2540 2541 /* 2542 This code adds extra rows to make sure the number of rows is 2543 divisible by the blocksize 2544 */ 2545 mbs = M/bs; 2546 extra_rows = bs - M + bs*(mbs); 2547 if (extra_rows == bs) extra_rows = 0; 2548 else mbs++; 2549 if (extra_rows) { 2550 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2551 } 2552 2553 /* read in row lengths */ 2554 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2555 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2556 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2557 2558 /* read in column indices */ 2559 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2560 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2561 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2562 2563 /* loop over row lengths determining block row lengths */ 2564 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2565 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2566 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2567 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2568 masked = mask + mbs; 2569 rowcount = 0; nzcount = 0; 2570 for (i=0; i<mbs; i++) { 2571 nmask = 0; 2572 for (j=0; j<bs; j++) { 2573 kmax = rowlengths[rowcount]; 2574 for (k=0; k<kmax; k++) { 2575 tmp = jj[nzcount++]/bs; 2576 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2577 } 2578 rowcount++; 2579 } 2580 browlengths[i] += nmask; 2581 /* zero out the mask elements we set */ 2582 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2583 } 2584 2585 /* create our matrix */ 2586 ierr = MatCreate(comm,&B); 2587 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2588 ierr = MatSetType(B,type);CHKERRQ(ierr); 2589 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2590 a = (Mat_SeqBAIJ*)B->data; 2591 2592 /* set matrix "i" values */ 2593 a->i[0] = 0; 2594 for (i=1; i<= mbs; i++) { 2595 a->i[i] = a->i[i-1] + browlengths[i-1]; 2596 a->ilen[i-1] = browlengths[i-1]; 2597 } 2598 a->nz = 0; 2599 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2600 2601 /* read in nonzero values */ 2602 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2603 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2604 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2605 2606 /* set "a" and "j" values into matrix */ 2607 nzcount = 0; jcount = 0; 2608 for (i=0; i<mbs; i++) { 2609 nzcountb = nzcount; 2610 nmask = 0; 2611 for (j=0; j<bs; j++) { 2612 kmax = rowlengths[i*bs+j]; 2613 for (k=0; k<kmax; k++) { 2614 tmp = jj[nzcount++]/bs; 2615 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2616 } 2617 } 2618 /* sort the masked values */ 2619 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2620 2621 /* set "j" values into matrix */ 2622 maskcount = 1; 2623 for (j=0; j<nmask; j++) { 2624 a->j[jcount++] = masked[j]; 2625 mask[masked[j]] = maskcount++; 2626 } 2627 /* set "a" values into matrix */ 2628 ishift = bs2*a->i[i]; 2629 for (j=0; j<bs; j++) { 2630 kmax = rowlengths[i*bs+j]; 2631 for (k=0; k<kmax; k++) { 2632 tmp = jj[nzcountb]/bs ; 2633 block = mask[tmp] - 1; 2634 point = jj[nzcountb] - bs*tmp; 2635 idx = ishift + bs2*block + j + bs*point; 2636 a->a[idx] = (MatScalar)aa[nzcountb++]; 2637 } 2638 } 2639 /* zero out the mask elements we set */ 2640 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2641 } 2642 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2643 2644 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2645 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2646 ierr = PetscFree(aa);CHKERRQ(ierr); 2647 ierr = PetscFree(jj);CHKERRQ(ierr); 2648 ierr = PetscFree(mask);CHKERRQ(ierr); 2649 2650 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 ierr = MatView_Private(B);CHKERRQ(ierr); 2653 2654 *A = B; 2655 PetscFunctionReturn(0); 2656 } 2657 2658 #undef __FUNCT__ 2659 #define __FUNCT__ "MatCreateSeqBAIJ" 2660 /*@C 2661 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2662 compressed row) format. For good matrix assembly performance the 2663 user should preallocate the matrix storage by setting the parameter nz 2664 (or the array nnz). By setting these parameters accurately, performance 2665 during matrix assembly can be increased by more than a factor of 50. 2666 2667 Collective on MPI_Comm 2668 2669 Input Parameters: 2670 + comm - MPI communicator, set to PETSC_COMM_SELF 2671 . bs - size of block 2672 . m - number of rows 2673 . n - number of columns 2674 . nz - number of nonzero blocks per block row (same for all rows) 2675 - nnz - array containing the number of nonzero blocks in the various block rows 2676 (possibly different for each block row) or PETSC_NULL 2677 2678 Output Parameter: 2679 . A - the matrix 2680 2681 Options Database Keys: 2682 . -mat_no_unroll - uses code that does not unroll the loops in the 2683 block calculations (much slower) 2684 . -mat_block_size - size of the blocks to use 2685 2686 Level: intermediate 2687 2688 Notes: 2689 The number of rows and columns must be divisible by blocksize. 2690 2691 If the nnz parameter is given then the nz parameter is ignored 2692 2693 A nonzero block is any block that as 1 or more nonzeros in it 2694 2695 The block AIJ format is fully compatible with standard Fortran 77 2696 storage. That is, the stored row and column indices can begin at 2697 either one (as in Fortran) or zero. See the users' manual for details. 2698 2699 Specify the preallocated storage with either nz or nnz (not both). 2700 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2701 allocation. For additional details, see the users manual chapter on 2702 matrices. 2703 2704 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2705 @*/ 2706 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2707 { 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2712 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2713 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2714 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2720 /*@C 2721 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2722 per row in the matrix. For good matrix assembly performance the 2723 user should preallocate the matrix storage by setting the parameter nz 2724 (or the array nnz). By setting these parameters accurately, performance 2725 during matrix assembly can be increased by more than a factor of 50. 2726 2727 Collective on MPI_Comm 2728 2729 Input Parameters: 2730 + A - the matrix 2731 . bs - size of block 2732 . nz - number of block nonzeros per block row (same for all rows) 2733 - nnz - array containing the number of block nonzeros in the various block rows 2734 (possibly different for each block row) or PETSC_NULL 2735 2736 Options Database Keys: 2737 . -mat_no_unroll - uses code that does not unroll the loops in the 2738 block calculations (much slower) 2739 . -mat_block_size - size of the blocks to use 2740 2741 Level: intermediate 2742 2743 Notes: 2744 If the nnz parameter is given then the nz parameter is ignored 2745 2746 The block AIJ format is fully compatible with standard Fortran 77 2747 storage. That is, the stored row and column indices can begin at 2748 either one (as in Fortran) or zero. See the users' manual for details. 2749 2750 Specify the preallocated storage with either nz or nnz (not both). 2751 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2752 allocation. For additional details, see the users manual chapter on 2753 matrices. 2754 2755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2756 @*/ 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2758 { 2759 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2760 2761 PetscFunctionBegin; 2762 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2763 if (f) { 2764 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2765 } 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2771 /*@ 2772 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2773 (upper triangular entries in CSR format) provided by the user. 2774 2775 Collective on MPI_Comm 2776 2777 Input Parameters: 2778 + comm - must be an MPI communicator of size 1 2779 . bs - size of block 2780 . m - number of rows 2781 . n - number of columns 2782 . i - row indices 2783 . j - column indices 2784 - a - matrix values 2785 2786 Output Parameter: 2787 . mat - the matrix 2788 2789 Level: intermediate 2790 2791 Notes: 2792 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2793 once the matrix is destroyed 2794 2795 You cannot set new nonzero locations into this matrix, that will generate an error. 2796 2797 The i and j indices are 0 based 2798 2799 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2800 2801 @*/ 2802 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2803 { 2804 PetscErrorCode ierr; 2805 PetscInt ii; 2806 Mat_SeqBAIJ *baij; 2807 2808 PetscFunctionBegin; 2809 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2810 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2811 2812 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2814 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2815 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2816 baij = (Mat_SeqBAIJ*)(*mat)->data; 2817 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2818 2819 baij->i = i; 2820 baij->j = j; 2821 baij->a = a; 2822 baij->singlemalloc = PETSC_FALSE; 2823 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2824 baij->freedata = PETSC_FALSE; 2825 2826 for (ii=0; ii<m; ii++) { 2827 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2828 #if defined(PETSC_USE_DEBUG) 2829 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2830 #endif 2831 } 2832 #if defined(PETSC_USE_DEBUG) 2833 for (ii=0; ii<baij->i[m]; ii++) { 2834 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2835 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2836 } 2837 #endif 2838 2839 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843