1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 785 PetscInt *tia, *tja; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 792 } else { 793 tia = a->i; tja = a->j; 794 } 795 796 if (!blockcompressed) { 797 /* malloc & create the natural set of indices */ 798 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 799 for (i=0; i<n+1; i++) { 800 for (j=0; j<bs; j++) { 801 *ia[i*bs+j] = tia[i]*bs+j; 802 } 803 } 804 for (i=0; i<nz; i++) { 805 for (j=0; j<bs; j++) { 806 *ja[i*bs+j] = tia[i]*bs+j; 807 } 808 } 809 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 810 ierr = PetscFree(tia);CHKERRQ(ierr); 811 ierr = PetscFree(tja);CHKERRQ(ierr); 812 } 813 } else { 814 *ia = tia; 815 *ja = tja; 816 } 817 if (oshift == 1) { 818 for (i=0; i<nz; i++) (*ja)[i]++; 819 for (i=0; i<n+1; i++) (*ia)[i]++; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 PetscInt i,n = a->mbs,nz = a->i[n]; 831 832 PetscFunctionBegin; 833 if (!ia) PetscFunctionReturn(0); 834 if (!blockcompressed) { 835 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 836 }else if (symmetric) { 837 ierr = PetscFree(*ia);CHKERRQ(ierr); 838 ierr = PetscFree(*ja);CHKERRQ(ierr); 839 } else if (oshift == 1) { /* blockcompressed */ 840 for (i=0; i<nz; i++) a->j[i]--; 841 for (i=0; i<n+1; i++) a->i[i]--; 842 } 843 PetscFunctionReturn(0); 844 } 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDestroy_SeqBAIJ" 848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 851 PetscErrorCode ierr; 852 853 PetscFunctionBegin; 854 #if defined(PETSC_USE_LOG) 855 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 856 #endif 857 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 858 if (a->row) { 859 ierr = ISDestroy(a->row);CHKERRQ(ierr); 860 } 861 if (a->col) { 862 ierr = ISDestroy(a->col);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(a->diag);CHKERRQ(ierr); 865 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 866 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 867 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 868 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 869 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 870 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 871 #if defined(PETSC_USE_MAT_SINGLE) 872 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 873 #endif 874 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 875 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 876 877 ierr = PetscFree(a);CHKERRQ(ierr); 878 879 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatSetOption_SeqBAIJ" 892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 893 { 894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 switch (op) { 899 case MAT_ROW_ORIENTED: 900 a->roworiented = PETSC_TRUE; 901 break; 902 case MAT_COLUMN_ORIENTED: 903 a->roworiented = PETSC_FALSE; 904 break; 905 case MAT_COLUMNS_SORTED: 906 a->sorted = PETSC_TRUE; 907 break; 908 case MAT_COLUMNS_UNSORTED: 909 a->sorted = PETSC_FALSE; 910 break; 911 case MAT_KEEP_ZEROED_ROWS: 912 a->keepzeroedrows = PETSC_TRUE; 913 break; 914 case MAT_NO_NEW_NONZERO_LOCATIONS: 915 a->nonew = 1; 916 break; 917 case MAT_NEW_NONZERO_LOCATION_ERR: 918 a->nonew = -1; 919 break; 920 case MAT_NEW_NONZERO_ALLOCATION_ERR: 921 a->nonew = -2; 922 break; 923 case MAT_YES_NEW_NONZERO_LOCATIONS: 924 a->nonew = 0; 925 break; 926 case MAT_ROWS_SORTED: 927 case MAT_ROWS_UNSORTED: 928 case MAT_YES_NEW_DIAGONALS: 929 case MAT_IGNORE_OFF_PROC_ENTRIES: 930 case MAT_USE_HASH_TABLE: 931 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 932 break; 933 case MAT_NO_NEW_DIAGONALS: 934 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 935 case MAT_SYMMETRIC: 936 case MAT_STRUCTURALLY_SYMMETRIC: 937 case MAT_NOT_SYMMETRIC: 938 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 939 case MAT_HERMITIAN: 940 case MAT_NOT_HERMITIAN: 941 case MAT_SYMMETRY_ETERNAL: 942 case MAT_NOT_SYMMETRY_ETERNAL: 943 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 944 break; 945 default: 946 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatGetRow_SeqBAIJ" 953 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 954 { 955 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 958 MatScalar *aa,*aa_i; 959 PetscScalar *v_i; 960 961 PetscFunctionBegin; 962 bs = A->rmap.bs; 963 ai = a->i; 964 aj = a->j; 965 aa = a->a; 966 bs2 = a->bs2; 967 968 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 969 970 bn = row/bs; /* Block number */ 971 bp = row % bs; /* Block Position */ 972 M = ai[bn+1] - ai[bn]; 973 *nz = bs*M; 974 975 if (v) { 976 *v = 0; 977 if (*nz) { 978 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 979 for (i=0; i<M; i++) { /* for each block in the block row */ 980 v_i = *v + i*bs; 981 aa_i = aa + bs2*(ai[bn] + i); 982 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 983 } 984 } 985 } 986 987 if (idx) { 988 *idx = 0; 989 if (*nz) { 990 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 991 for (i=0; i<M; i++) { /* for each block in the block row */ 992 idx_i = *idx + i*bs; 993 itmp = bs*aj[ai[bn] + i]; 994 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 995 } 996 } 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1003 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1004 { 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1009 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1015 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1016 { 1017 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1018 Mat C; 1019 PetscErrorCode ierr; 1020 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1021 PetscInt *rows,*cols,bs2=a->bs2; 1022 PetscScalar *array; 1023 1024 PetscFunctionBegin; 1025 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1026 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1027 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1028 1029 #if defined(PETSC_USE_MAT_SINGLE) 1030 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1031 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1032 #else 1033 array = a->a; 1034 #endif 1035 1036 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1037 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1038 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1039 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1040 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1041 ierr = PetscFree(col);CHKERRQ(ierr); 1042 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1043 cols = rows + bs; 1044 for (i=0; i<mbs; i++) { 1045 cols[0] = i*bs; 1046 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1047 len = ai[i+1] - ai[i]; 1048 for (j=0; j<len; j++) { 1049 rows[0] = (*aj++)*bs; 1050 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1051 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1052 array += bs2; 1053 } 1054 } 1055 ierr = PetscFree(rows);CHKERRQ(ierr); 1056 #if defined(PETSC_USE_MAT_SINGLE) 1057 ierr = PetscFree(array); 1058 #endif 1059 1060 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1062 1063 if (B) { 1064 *B = C; 1065 } else { 1066 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1073 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1074 { 1075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1078 int fd; 1079 PetscScalar *aa; 1080 FILE *file; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1084 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1085 col_lens[0] = MAT_FILE_COOKIE; 1086 1087 col_lens[1] = A->rmap.N; 1088 col_lens[2] = A->cmap.n; 1089 col_lens[3] = a->nz*bs2; 1090 1091 /* store lengths of each row and write (including header) to file */ 1092 count = 0; 1093 for (i=0; i<a->mbs; i++) { 1094 for (j=0; j<bs; j++) { 1095 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1096 } 1097 } 1098 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1099 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1100 1101 /* store column indices (zero start index) */ 1102 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1103 count = 0; 1104 for (i=0; i<a->mbs; i++) { 1105 for (j=0; j<bs; j++) { 1106 for (k=a->i[i]; k<a->i[i+1]; k++) { 1107 for (l=0; l<bs; l++) { 1108 jj[count++] = bs*a->j[k] + l; 1109 } 1110 } 1111 } 1112 } 1113 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1114 ierr = PetscFree(jj);CHKERRQ(ierr); 1115 1116 /* store nonzero values */ 1117 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1118 count = 0; 1119 for (i=0; i<a->mbs; i++) { 1120 for (j=0; j<bs; j++) { 1121 for (k=a->i[i]; k<a->i[i+1]; k++) { 1122 for (l=0; l<bs; l++) { 1123 aa[count++] = a->a[bs2*k + l*bs + j]; 1124 } 1125 } 1126 } 1127 } 1128 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1129 ierr = PetscFree(aa);CHKERRQ(ierr); 1130 1131 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1132 if (file) { 1133 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1140 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1141 { 1142 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1145 PetscViewerFormat format; 1146 1147 PetscFunctionBegin; 1148 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1149 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1150 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1151 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1152 Mat aij; 1153 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1154 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1155 ierr = MatDestroy(aij);CHKERRQ(ierr); 1156 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1157 PetscFunctionReturn(0); 1158 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1159 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1160 for (i=0; i<a->mbs; i++) { 1161 for (j=0; j<bs; j++) { 1162 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1163 for (k=a->i[i]; k<a->i[i+1]; k++) { 1164 for (l=0; l<bs; l++) { 1165 #if defined(PETSC_USE_COMPLEX) 1166 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1167 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1168 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1169 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1170 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1171 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1172 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1174 } 1175 #else 1176 if (a->a[bs2*k + l*bs + j] != 0.0) { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1178 } 1179 #endif 1180 } 1181 } 1182 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1183 } 1184 } 1185 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1186 } else { 1187 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1188 for (i=0; i<a->mbs; i++) { 1189 for (j=0; j<bs; j++) { 1190 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1191 for (k=a->i[i]; k<a->i[i+1]; k++) { 1192 for (l=0; l<bs; l++) { 1193 #if defined(PETSC_USE_COMPLEX) 1194 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1195 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1196 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1197 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1199 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1202 } 1203 #else 1204 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1205 #endif 1206 } 1207 } 1208 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1209 } 1210 } 1211 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1212 } 1213 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1219 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1220 { 1221 Mat A = (Mat) Aa; 1222 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1225 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1226 MatScalar *aa; 1227 PetscViewer viewer; 1228 1229 PetscFunctionBegin; 1230 1231 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1232 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1233 1234 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1235 1236 /* loop over matrix elements drawing boxes */ 1237 color = PETSC_DRAW_BLUE; 1238 for (i=0,row=0; i<mbs; i++,row+=bs) { 1239 for (j=a->i[i]; j<a->i[i+1]; j++) { 1240 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1241 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1242 aa = a->a + j*bs2; 1243 for (k=0; k<bs; k++) { 1244 for (l=0; l<bs; l++) { 1245 if (PetscRealPart(*aa++) >= 0.) continue; 1246 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1247 } 1248 } 1249 } 1250 } 1251 color = PETSC_DRAW_CYAN; 1252 for (i=0,row=0; i<mbs; i++,row+=bs) { 1253 for (j=a->i[i]; j<a->i[i+1]; j++) { 1254 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1255 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1256 aa = a->a + j*bs2; 1257 for (k=0; k<bs; k++) { 1258 for (l=0; l<bs; l++) { 1259 if (PetscRealPart(*aa++) != 0.) continue; 1260 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1261 } 1262 } 1263 } 1264 } 1265 1266 color = PETSC_DRAW_RED; 1267 for (i=0,row=0; i<mbs; i++,row+=bs) { 1268 for (j=a->i[i]; j<a->i[i+1]; j++) { 1269 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1270 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1271 aa = a->a + j*bs2; 1272 for (k=0; k<bs; k++) { 1273 for (l=0; l<bs; l++) { 1274 if (PetscRealPart(*aa++) <= 0.) continue; 1275 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1276 } 1277 } 1278 } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1285 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1286 { 1287 PetscErrorCode ierr; 1288 PetscReal xl,yl,xr,yr,w,h; 1289 PetscDraw draw; 1290 PetscTruth isnull; 1291 1292 PetscFunctionBegin; 1293 1294 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1295 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1296 1297 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1298 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1299 xr += w; yr += h; xl = -w; yl = -h; 1300 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1301 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1302 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_SeqBAIJ" 1308 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1309 { 1310 PetscErrorCode ierr; 1311 PetscTruth iascii,isbinary,isdraw; 1312 1313 PetscFunctionBegin; 1314 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1315 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1316 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1317 if (iascii){ 1318 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1319 } else if (isbinary) { 1320 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1321 } else if (isdraw) { 1322 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1323 } else { 1324 Mat B; 1325 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1326 ierr = MatView(B,viewer);CHKERRQ(ierr); 1327 ierr = MatDestroy(B);CHKERRQ(ierr); 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1335 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1336 { 1337 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1338 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1339 PetscInt *ai = a->i,*ailen = a->ilen; 1340 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1341 MatScalar *ap,*aa = a->a,zero = 0.0; 1342 1343 PetscFunctionBegin; 1344 for (k=0; k<m; k++) { /* loop over rows */ 1345 row = im[k]; brow = row/bs; 1346 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1347 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1348 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1349 nrow = ailen[brow]; 1350 for (l=0; l<n; l++) { /* loop over columns */ 1351 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1352 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1353 col = in[l] ; 1354 bcol = col/bs; 1355 cidx = col%bs; 1356 ridx = row%bs; 1357 high = nrow; 1358 low = 0; /* assume unsorted */ 1359 while (high-low > 5) { 1360 t = (low+high)/2; 1361 if (rp[t] > bcol) high = t; 1362 else low = t; 1363 } 1364 for (i=low; i<high; i++) { 1365 if (rp[i] > bcol) break; 1366 if (rp[i] == bcol) { 1367 *v++ = ap[bs2*i+bs*cidx+ridx]; 1368 goto finished; 1369 } 1370 } 1371 *v++ = zero; 1372 finished:; 1373 } 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #if defined(PETSC_USE_MAT_SINGLE) 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1381 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1382 { 1383 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1384 PetscErrorCode ierr; 1385 PetscInt i,N = m*n*b->bs2; 1386 MatScalar *vsingle; 1387 1388 PetscFunctionBegin; 1389 if (N > b->setvalueslen) { 1390 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1391 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1392 b->setvalueslen = N; 1393 } 1394 vsingle = b->setvaluescopy; 1395 for (i=0; i<N; i++) { 1396 vsingle[i] = v[i]; 1397 } 1398 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 #endif 1402 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1406 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1409 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1410 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1411 PetscErrorCode ierr; 1412 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1413 PetscTruth roworiented=a->roworiented; 1414 const MatScalar *value = v; 1415 MatScalar *ap,*aa = a->a,*bap; 1416 1417 PetscFunctionBegin; 1418 if (roworiented) { 1419 stepval = (n-1)*bs; 1420 } else { 1421 stepval = (m-1)*bs; 1422 } 1423 for (k=0; k<m; k++) { /* loop over added rows */ 1424 row = im[k]; 1425 if (row < 0) continue; 1426 #if defined(PETSC_USE_DEBUG) 1427 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1428 #endif 1429 rp = aj + ai[row]; 1430 ap = aa + bs2*ai[row]; 1431 rmax = imax[row]; 1432 nrow = ailen[row]; 1433 low = 0; 1434 high = nrow; 1435 for (l=0; l<n; l++) { /* loop over added columns */ 1436 if (in[l] < 0) continue; 1437 #if defined(PETSC_USE_DEBUG) 1438 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1439 #endif 1440 col = in[l]; 1441 if (roworiented) { 1442 value = v + k*(stepval+bs)*bs + l*bs; 1443 } else { 1444 value = v + l*(stepval+bs)*bs + k*bs; 1445 } 1446 if (col <= lastcol) low = 0; else high = nrow; 1447 lastcol = col; 1448 while (high-low > 7) { 1449 t = (low+high)/2; 1450 if (rp[t] > col) high = t; 1451 else low = t; 1452 } 1453 for (i=low; i<high; i++) { 1454 if (rp[i] > col) break; 1455 if (rp[i] == col) { 1456 bap = ap + bs2*i; 1457 if (roworiented) { 1458 if (is == ADD_VALUES) { 1459 for (ii=0; ii<bs; ii++,value+=stepval) { 1460 for (jj=ii; jj<bs2; jj+=bs) { 1461 bap[jj] += *value++; 1462 } 1463 } 1464 } else { 1465 for (ii=0; ii<bs; ii++,value+=stepval) { 1466 for (jj=ii; jj<bs2; jj+=bs) { 1467 bap[jj] = *value++; 1468 } 1469 } 1470 } 1471 } else { 1472 if (is == ADD_VALUES) { 1473 for (ii=0; ii<bs; ii++,value+=stepval) { 1474 for (jj=0; jj<bs; jj++) { 1475 *bap++ += *value++; 1476 } 1477 } 1478 } else { 1479 for (ii=0; ii<bs; ii++,value+=stepval) { 1480 for (jj=0; jj<bs; jj++) { 1481 *bap++ = *value++; 1482 } 1483 } 1484 } 1485 } 1486 goto noinsert2; 1487 } 1488 } 1489 if (nonew == 1) goto noinsert2; 1490 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1491 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1492 N = nrow++ - 1; high++; 1493 /* shift up all the later entries in this row */ 1494 for (ii=N; ii>=i; ii--) { 1495 rp[ii+1] = rp[ii]; 1496 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1497 } 1498 if (N >= i) { 1499 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1500 } 1501 rp[i] = col; 1502 bap = ap + bs2*i; 1503 if (roworiented) { 1504 for (ii=0; ii<bs; ii++,value+=stepval) { 1505 for (jj=ii; jj<bs2; jj+=bs) { 1506 bap[jj] = *value++; 1507 } 1508 } 1509 } else { 1510 for (ii=0; ii<bs; ii++,value+=stepval) { 1511 for (jj=0; jj<bs; jj++) { 1512 *bap++ = *value++; 1513 } 1514 } 1515 } 1516 noinsert2:; 1517 low = i; 1518 } 1519 ailen[row] = nrow; 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1526 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1527 { 1528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1529 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1530 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1531 PetscErrorCode ierr; 1532 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1533 MatScalar *aa = a->a,*ap; 1534 PetscReal ratio=0.6; 1535 1536 PetscFunctionBegin; 1537 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1538 1539 if (m) rmax = ailen[0]; 1540 for (i=1; i<mbs; i++) { 1541 /* move each row back by the amount of empty slots (fshift) before it*/ 1542 fshift += imax[i-1] - ailen[i-1]; 1543 rmax = PetscMax(rmax,ailen[i]); 1544 if (fshift) { 1545 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1546 N = ailen[i]; 1547 for (j=0; j<N; j++) { 1548 ip[j-fshift] = ip[j]; 1549 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1550 } 1551 } 1552 ai[i] = ai[i-1] + ailen[i-1]; 1553 } 1554 if (mbs) { 1555 fshift += imax[mbs-1] - ailen[mbs-1]; 1556 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1557 } 1558 /* reset ilen and imax for each row */ 1559 for (i=0; i<mbs; i++) { 1560 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1561 } 1562 a->nz = ai[mbs]; 1563 1564 /* diagonals may have moved, so kill the diagonal pointers */ 1565 a->idiagvalid = PETSC_FALSE; 1566 if (fshift && a->diag) { 1567 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1568 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1569 a->diag = 0; 1570 } 1571 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1572 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1573 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1574 a->reallocs = 0; 1575 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1576 1577 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1578 if (a->compressedrow.use){ 1579 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1580 } 1581 1582 A->same_nonzero = PETSC_TRUE; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 /* 1587 This function returns an array of flags which indicate the locations of contiguous 1588 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1589 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1590 Assume: sizes should be long enough to hold all the values. 1591 */ 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1594 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1595 { 1596 PetscInt i,j,k,row; 1597 PetscTruth flg; 1598 1599 PetscFunctionBegin; 1600 for (i=0,j=0; i<n; j++) { 1601 row = idx[i]; 1602 if (row%bs!=0) { /* Not the begining of a block */ 1603 sizes[j] = 1; 1604 i++; 1605 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1606 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1607 i++; 1608 } else { /* Begining of the block, so check if the complete block exists */ 1609 flg = PETSC_TRUE; 1610 for (k=1; k<bs; k++) { 1611 if (row+k != idx[i+k]) { /* break in the block */ 1612 flg = PETSC_FALSE; 1613 break; 1614 } 1615 } 1616 if (flg) { /* No break in the bs */ 1617 sizes[j] = bs; 1618 i+= bs; 1619 } else { 1620 sizes[j] = 1; 1621 i++; 1622 } 1623 } 1624 } 1625 *bs_max = j; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1631 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1632 { 1633 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1634 PetscErrorCode ierr; 1635 PetscInt i,j,k,count,*rows; 1636 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1637 PetscScalar zero = 0.0; 1638 MatScalar *aa; 1639 1640 PetscFunctionBegin; 1641 /* Make a copy of the IS and sort it */ 1642 /* allocate memory for rows,sizes */ 1643 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1644 sizes = rows + is_n; 1645 1646 /* copy IS values to rows, and sort them */ 1647 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1648 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1649 if (baij->keepzeroedrows) { 1650 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1651 bs_max = is_n; 1652 A->same_nonzero = PETSC_TRUE; 1653 } else { 1654 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1655 A->same_nonzero = PETSC_FALSE; 1656 } 1657 1658 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1659 row = rows[j]; 1660 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1661 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1662 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1663 if (sizes[i] == bs && !baij->keepzeroedrows) { 1664 if (diag != 0.0) { 1665 if (baij->ilen[row/bs] > 0) { 1666 baij->ilen[row/bs] = 1; 1667 baij->j[baij->i[row/bs]] = row/bs; 1668 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1669 } 1670 /* Now insert all the diagonal values for this bs */ 1671 for (k=0; k<bs; k++) { 1672 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1673 } 1674 } else { /* (diag == 0.0) */ 1675 baij->ilen[row/bs] = 0; 1676 } /* end (diag == 0.0) */ 1677 } else { /* (sizes[i] != bs) */ 1678 #if defined (PETSC_USE_DEBUG) 1679 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1680 #endif 1681 for (k=0; k<count; k++) { 1682 aa[0] = zero; 1683 aa += bs; 1684 } 1685 if (diag != 0.0) { 1686 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1687 } 1688 } 1689 } 1690 1691 ierr = PetscFree(rows);CHKERRQ(ierr); 1692 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1698 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1699 { 1700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1701 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1702 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1703 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1704 PetscErrorCode ierr; 1705 PetscInt ridx,cidx,bs2=a->bs2; 1706 PetscTruth roworiented=a->roworiented; 1707 MatScalar *ap,value,*aa=a->a,*bap; 1708 1709 PetscFunctionBegin; 1710 for (k=0; k<m; k++) { /* loop over added rows */ 1711 row = im[k]; 1712 brow = row/bs; 1713 if (row < 0) continue; 1714 #if defined(PETSC_USE_DEBUG) 1715 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1716 #endif 1717 rp = aj + ai[brow]; 1718 ap = aa + bs2*ai[brow]; 1719 rmax = imax[brow]; 1720 nrow = ailen[brow]; 1721 low = 0; 1722 high = nrow; 1723 for (l=0; l<n; l++) { /* loop over added columns */ 1724 if (in[l] < 0) continue; 1725 #if defined(PETSC_USE_DEBUG) 1726 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1727 #endif 1728 col = in[l]; bcol = col/bs; 1729 ridx = row % bs; cidx = col % bs; 1730 if (roworiented) { 1731 value = v[l + k*n]; 1732 } else { 1733 value = v[k + l*m]; 1734 } 1735 if (col <= lastcol) low = 0; else high = nrow; 1736 lastcol = col; 1737 while (high-low > 7) { 1738 t = (low+high)/2; 1739 if (rp[t] > bcol) high = t; 1740 else low = t; 1741 } 1742 for (i=low; i<high; i++) { 1743 if (rp[i] > bcol) break; 1744 if (rp[i] == bcol) { 1745 bap = ap + bs2*i + bs*cidx + ridx; 1746 if (is == ADD_VALUES) *bap += value; 1747 else *bap = value; 1748 goto noinsert1; 1749 } 1750 } 1751 if (nonew == 1) goto noinsert1; 1752 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1753 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1754 N = nrow++ - 1; high++; 1755 /* shift up all the later entries in this row */ 1756 for (ii=N; ii>=i; ii--) { 1757 rp[ii+1] = rp[ii]; 1758 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1759 } 1760 if (N>=i) { 1761 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1762 } 1763 rp[i] = bcol; 1764 ap[bs2*i + bs*cidx + ridx] = value; 1765 a->nz++; 1766 noinsert1:; 1767 low = i; 1768 } 1769 ailen[brow] = nrow; 1770 } 1771 A->same_nonzero = PETSC_FALSE; 1772 PetscFunctionReturn(0); 1773 } 1774 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1778 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1779 { 1780 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1781 Mat outA; 1782 PetscErrorCode ierr; 1783 PetscTruth row_identity,col_identity; 1784 1785 PetscFunctionBegin; 1786 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1787 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1788 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1789 if (!row_identity || !col_identity) { 1790 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1791 } 1792 1793 outA = inA; 1794 inA->factor = FACTOR_LU; 1795 1796 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1797 1798 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1799 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1800 a->row = row; 1801 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1802 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1803 a->col = col; 1804 1805 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1806 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1807 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1808 1809 /* 1810 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1811 for ILU(0) factorization with natural ordering 1812 */ 1813 if (inA->rmap.bs < 8) { 1814 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1815 } else { 1816 if (!a->solve_work) { 1817 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1818 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1819 } 1820 } 1821 1822 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1823 1824 PetscFunctionReturn(0); 1825 } 1826 1827 EXTERN_C_BEGIN 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1830 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1831 { 1832 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1833 PetscInt i,nz,nbs; 1834 1835 PetscFunctionBegin; 1836 nz = baij->maxnz/baij->bs2; 1837 nbs = baij->nbs; 1838 for (i=0; i<nz; i++) { 1839 baij->j[i] = indices[i]; 1840 } 1841 baij->nz = nz; 1842 for (i=0; i<nbs; i++) { 1843 baij->ilen[i] = baij->imax[i]; 1844 } 1845 1846 PetscFunctionReturn(0); 1847 } 1848 EXTERN_C_END 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1852 /*@ 1853 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1854 in the matrix. 1855 1856 Input Parameters: 1857 + mat - the SeqBAIJ matrix 1858 - indices - the column indices 1859 1860 Level: advanced 1861 1862 Notes: 1863 This can be called if you have precomputed the nonzero structure of the 1864 matrix and want to provide it to the matrix object to improve the performance 1865 of the MatSetValues() operation. 1866 1867 You MUST have set the correct numbers of nonzeros per row in the call to 1868 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1869 1870 MUST be called before any calls to MatSetValues(); 1871 1872 @*/ 1873 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1874 { 1875 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1876 1877 PetscFunctionBegin; 1878 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1879 PetscValidPointer(indices,2); 1880 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1881 if (f) { 1882 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1883 } else { 1884 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1891 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1892 { 1893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1896 PetscReal atmp; 1897 PetscScalar *x,zero = 0.0; 1898 MatScalar *aa; 1899 PetscInt ncols,brow,krow,kcol; 1900 1901 PetscFunctionBegin; 1902 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1903 bs = A->rmap.bs; 1904 aa = a->a; 1905 ai = a->i; 1906 aj = a->j; 1907 mbs = a->mbs; 1908 1909 ierr = VecSet(v,zero);CHKERRQ(ierr); 1910 if (idx) { 1911 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1912 } 1913 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1914 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1915 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1916 for (i=0; i<mbs; i++) { 1917 ncols = ai[1] - ai[0]; ai++; 1918 brow = bs*i; 1919 for (j=0; j<ncols; j++){ 1920 for (kcol=0; kcol<bs; kcol++){ 1921 for (krow=0; krow<bs; krow++){ 1922 atmp = PetscAbsScalar(*aa);aa++; 1923 row = brow + krow; /* row index */ 1924 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1925 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1926 } 1927 } 1928 aj++; 1929 } 1930 } 1931 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_SeqBAIJ" 1937 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 /* If the two matrices have the same copy implementation, use fast copy. */ 1943 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1944 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1945 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1946 1947 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1948 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1949 } 1950 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1951 } else { 1952 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1959 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1960 { 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1970 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1971 { 1972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1973 PetscFunctionBegin; 1974 *array = a->a; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1980 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1981 { 1982 PetscFunctionBegin; 1983 PetscFunctionReturn(0); 1984 } 1985 1986 #include "petscblaslapack.h" 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1989 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1990 { 1991 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1992 PetscErrorCode ierr; 1993 PetscInt i,bs=Y->rmap.bs,j,bs2; 1994 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1995 1996 PetscFunctionBegin; 1997 if (str == SAME_NONZERO_PATTERN) { 1998 PetscScalar alpha = a; 1999 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2000 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2001 if (y->xtoy && y->XtoY != X) { 2002 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2003 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2004 } 2005 if (!y->xtoy) { /* get xtoy */ 2006 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2007 y->XtoY = X; 2008 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2009 } 2010 bs2 = bs*bs; 2011 for (i=0; i<x->nz; i++) { 2012 j = 0; 2013 while (j < bs2){ 2014 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2015 j++; 2016 } 2017 } 2018 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2019 } else { 2020 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2027 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2028 { 2029 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2030 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2031 PetscScalar *aa = a->a; 2032 2033 PetscFunctionBegin; 2034 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2040 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2041 { 2042 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2043 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2044 PetscScalar *aa = a->a; 2045 2046 PetscFunctionBegin; 2047 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 2052 /* -------------------------------------------------------------------*/ 2053 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2054 MatGetRow_SeqBAIJ, 2055 MatRestoreRow_SeqBAIJ, 2056 MatMult_SeqBAIJ_N, 2057 /* 4*/ MatMultAdd_SeqBAIJ_N, 2058 MatMultTranspose_SeqBAIJ, 2059 MatMultTransposeAdd_SeqBAIJ, 2060 MatSolve_SeqBAIJ_N, 2061 0, 2062 0, 2063 /*10*/ 0, 2064 MatLUFactor_SeqBAIJ, 2065 0, 2066 0, 2067 MatTranspose_SeqBAIJ, 2068 /*15*/ MatGetInfo_SeqBAIJ, 2069 MatEqual_SeqBAIJ, 2070 MatGetDiagonal_SeqBAIJ, 2071 MatDiagonalScale_SeqBAIJ, 2072 MatNorm_SeqBAIJ, 2073 /*20*/ 0, 2074 MatAssemblyEnd_SeqBAIJ, 2075 0, 2076 MatSetOption_SeqBAIJ, 2077 MatZeroEntries_SeqBAIJ, 2078 /*25*/ MatZeroRows_SeqBAIJ, 2079 MatLUFactorSymbolic_SeqBAIJ, 2080 MatLUFactorNumeric_SeqBAIJ_N, 2081 MatCholeskyFactorSymbolic_SeqBAIJ, 2082 MatCholeskyFactorNumeric_SeqBAIJ_N, 2083 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2084 MatILUFactorSymbolic_SeqBAIJ, 2085 MatICCFactorSymbolic_SeqBAIJ, 2086 MatGetArray_SeqBAIJ, 2087 MatRestoreArray_SeqBAIJ, 2088 /*35*/ MatDuplicate_SeqBAIJ, 2089 0, 2090 0, 2091 MatILUFactor_SeqBAIJ, 2092 0, 2093 /*40*/ MatAXPY_SeqBAIJ, 2094 MatGetSubMatrices_SeqBAIJ, 2095 MatIncreaseOverlap_SeqBAIJ, 2096 MatGetValues_SeqBAIJ, 2097 MatCopy_SeqBAIJ, 2098 /*45*/ 0, 2099 MatScale_SeqBAIJ, 2100 0, 2101 0, 2102 0, 2103 /*50*/ 0, 2104 MatGetRowIJ_SeqBAIJ, 2105 MatRestoreRowIJ_SeqBAIJ, 2106 0, 2107 0, 2108 /*55*/ 0, 2109 0, 2110 0, 2111 0, 2112 MatSetValuesBlocked_SeqBAIJ, 2113 /*60*/ MatGetSubMatrix_SeqBAIJ, 2114 MatDestroy_SeqBAIJ, 2115 MatView_SeqBAIJ, 2116 0, 2117 0, 2118 /*65*/ 0, 2119 0, 2120 0, 2121 0, 2122 0, 2123 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2124 MatConvert_Basic, 2125 0, 2126 0, 2127 0, 2128 /*75*/ 0, 2129 0, 2130 0, 2131 0, 2132 0, 2133 /*80*/ 0, 2134 0, 2135 0, 2136 0, 2137 MatLoad_SeqBAIJ, 2138 /*85*/ 0, 2139 0, 2140 0, 2141 0, 2142 0, 2143 /*90*/ 0, 2144 0, 2145 0, 2146 0, 2147 0, 2148 /*95*/ 0, 2149 0, 2150 0, 2151 0, 2152 0, 2153 /*100*/0, 2154 0, 2155 0, 2156 0, 2157 0, 2158 /*105*/0, 2159 MatRealPart_SeqBAIJ, 2160 MatImaginaryPart_SeqBAIJ 2161 }; 2162 2163 EXTERN_C_BEGIN 2164 #undef __FUNCT__ 2165 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2166 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2167 { 2168 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2169 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 if (aij->nonew != 1) { 2174 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2175 } 2176 2177 /* allocate space for values if not already there */ 2178 if (!aij->saved_values) { 2179 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2180 } 2181 2182 /* copy values over */ 2183 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 EXTERN_C_END 2187 2188 EXTERN_C_BEGIN 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2191 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2192 { 2193 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2194 PetscErrorCode ierr; 2195 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2196 2197 PetscFunctionBegin; 2198 if (aij->nonew != 1) { 2199 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2200 } 2201 if (!aij->saved_values) { 2202 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2203 } 2204 2205 /* copy values over */ 2206 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 EXTERN_C_END 2210 2211 EXTERN_C_BEGIN 2212 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2213 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2214 EXTERN_C_END 2215 2216 EXTERN_C_BEGIN 2217 #undef __FUNCT__ 2218 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2219 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2220 { 2221 Mat_SeqBAIJ *b; 2222 PetscErrorCode ierr; 2223 PetscInt i,mbs,nbs,bs2,newbs = bs; 2224 PetscTruth flg,skipallocation = PETSC_FALSE; 2225 2226 PetscFunctionBegin; 2227 2228 if (nz == MAT_SKIP_ALLOCATION) { 2229 skipallocation = PETSC_TRUE; 2230 nz = 0; 2231 } 2232 2233 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2234 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2235 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2236 2237 if (nnz && newbs != bs) { 2238 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2239 } 2240 bs = newbs; 2241 2242 B->rmap.bs = B->cmap.bs = bs; 2243 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2244 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2245 2246 B->preallocated = PETSC_TRUE; 2247 2248 mbs = B->rmap.n/bs; 2249 nbs = B->cmap.n/bs; 2250 bs2 = bs*bs; 2251 2252 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2253 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2254 } 2255 2256 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2257 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2258 if (nnz) { 2259 for (i=0; i<mbs; i++) { 2260 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2261 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2262 } 2263 } 2264 2265 b = (Mat_SeqBAIJ*)B->data; 2266 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2267 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2268 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2269 2270 B->ops->solve = MatSolve_SeqBAIJ_Update; 2271 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2272 if (!flg) { 2273 switch (bs) { 2274 case 1: 2275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2276 B->ops->mult = MatMult_SeqBAIJ_1; 2277 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2278 break; 2279 case 2: 2280 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2281 B->ops->mult = MatMult_SeqBAIJ_2; 2282 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2283 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2284 break; 2285 case 3: 2286 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2287 B->ops->mult = MatMult_SeqBAIJ_3; 2288 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2289 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2290 break; 2291 case 4: 2292 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2293 B->ops->mult = MatMult_SeqBAIJ_4; 2294 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2295 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2296 break; 2297 case 5: 2298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2299 B->ops->mult = MatMult_SeqBAIJ_5; 2300 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2301 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2302 break; 2303 case 6: 2304 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2305 B->ops->mult = MatMult_SeqBAIJ_6; 2306 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2307 break; 2308 case 7: 2309 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2310 B->ops->mult = MatMult_SeqBAIJ_7; 2311 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2312 break; 2313 default: 2314 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2315 B->ops->mult = MatMult_SeqBAIJ_N; 2316 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2317 break; 2318 } 2319 } 2320 B->rmap.bs = bs; 2321 b->mbs = mbs; 2322 b->nbs = nbs; 2323 if (!skipallocation) { 2324 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2325 /* b->ilen will count nonzeros in each block row so far. */ 2326 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2327 if (!nnz) { 2328 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2329 else if (nz <= 0) nz = 1; 2330 for (i=0; i<mbs; i++) b->imax[i] = nz; 2331 nz = nz*mbs; 2332 } else { 2333 nz = 0; 2334 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2335 } 2336 2337 /* allocate the matrix space */ 2338 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2339 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2340 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2341 b->singlemalloc = PETSC_TRUE; 2342 2343 b->i[0] = 0; 2344 for (i=1; i<mbs+1; i++) { 2345 b->i[i] = b->i[i-1] + b->imax[i-1]; 2346 } 2347 b->free_a = PETSC_TRUE; 2348 b->free_ij = PETSC_TRUE; 2349 } else { 2350 b->free_a = PETSC_FALSE; 2351 b->free_ij = PETSC_FALSE; 2352 } 2353 2354 B->rmap.bs = bs; 2355 b->bs2 = bs2; 2356 b->mbs = mbs; 2357 b->nz = 0; 2358 b->maxnz = nz*bs2; 2359 B->info.nz_unneeded = (PetscReal)b->maxnz; 2360 PetscFunctionReturn(0); 2361 } 2362 EXTERN_C_END 2363 2364 /*MC 2365 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2366 block sparse compressed row format. 2367 2368 Options Database Keys: 2369 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2370 2371 Level: beginner 2372 2373 .seealso: MatCreateSeqBAIJ() 2374 M*/ 2375 2376 EXTERN_C_BEGIN 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatCreate_SeqBAIJ" 2379 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2380 { 2381 PetscErrorCode ierr; 2382 PetscMPIInt size; 2383 Mat_SeqBAIJ *b; 2384 2385 PetscFunctionBegin; 2386 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2387 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2388 2389 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2390 B->data = (void*)b; 2391 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2392 B->factor = 0; 2393 B->mapping = 0; 2394 b->row = 0; 2395 b->col = 0; 2396 b->icol = 0; 2397 b->reallocs = 0; 2398 b->saved_values = 0; 2399 #if defined(PETSC_USE_MAT_SINGLE) 2400 b->setvalueslen = 0; 2401 b->setvaluescopy = PETSC_NULL; 2402 #endif 2403 2404 b->sorted = PETSC_FALSE; 2405 b->roworiented = PETSC_TRUE; 2406 b->nonew = 0; 2407 b->diag = 0; 2408 b->solve_work = 0; 2409 b->mult_work = 0; 2410 B->spptr = 0; 2411 B->info.nz_unneeded = (PetscReal)b->maxnz; 2412 b->keepzeroedrows = PETSC_FALSE; 2413 b->xtoy = 0; 2414 b->XtoY = 0; 2415 b->compressedrow.use = PETSC_FALSE; 2416 b->compressedrow.nrows = 0; 2417 b->compressedrow.i = PETSC_NULL; 2418 b->compressedrow.rindex = PETSC_NULL; 2419 b->compressedrow.checked = PETSC_FALSE; 2420 B->same_nonzero = PETSC_FALSE; 2421 2422 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2423 "MatInvertBlockDiagonal_SeqBAIJ", 2424 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2425 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2426 "MatStoreValues_SeqBAIJ", 2427 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2429 "MatRetrieveValues_SeqBAIJ", 2430 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2431 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2432 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2433 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2434 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2435 "MatConvert_SeqBAIJ_SeqAIJ", 2436 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2437 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2438 "MatConvert_SeqBAIJ_SeqSBAIJ", 2439 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2440 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2441 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2442 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2443 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 EXTERN_C_END 2447 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2450 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2451 { 2452 Mat C; 2453 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2454 PetscErrorCode ierr; 2455 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2456 2457 PetscFunctionBegin; 2458 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2459 2460 *B = 0; 2461 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2462 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2463 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2464 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2465 c = (Mat_SeqBAIJ*)C->data; 2466 2467 C->rmap.N = A->rmap.N; 2468 C->cmap.N = A->cmap.N; 2469 C->rmap.bs = A->rmap.bs; 2470 c->bs2 = a->bs2; 2471 c->mbs = a->mbs; 2472 c->nbs = a->nbs; 2473 2474 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2475 for (i=0; i<mbs; i++) { 2476 c->imax[i] = a->imax[i]; 2477 c->ilen[i] = a->ilen[i]; 2478 } 2479 2480 /* allocate the matrix space */ 2481 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2482 c->singlemalloc = PETSC_TRUE; 2483 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2484 if (mbs > 0) { 2485 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2486 if (cpvalues == MAT_COPY_VALUES) { 2487 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2488 } else { 2489 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2490 } 2491 } 2492 c->sorted = a->sorted; 2493 c->roworiented = a->roworiented; 2494 c->nonew = a->nonew; 2495 2496 if (a->diag) { 2497 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2498 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2499 for (i=0; i<mbs; i++) { 2500 c->diag[i] = a->diag[i]; 2501 } 2502 } else c->diag = 0; 2503 c->nz = a->nz; 2504 c->maxnz = a->maxnz; 2505 c->solve_work = 0; 2506 c->mult_work = 0; 2507 c->free_a = PETSC_TRUE; 2508 c->free_ij = PETSC_TRUE; 2509 C->preallocated = PETSC_TRUE; 2510 C->assembled = PETSC_TRUE; 2511 2512 c->compressedrow.use = a->compressedrow.use; 2513 c->compressedrow.nrows = a->compressedrow.nrows; 2514 c->compressedrow.checked = a->compressedrow.checked; 2515 if ( a->compressedrow.checked && a->compressedrow.use){ 2516 i = a->compressedrow.nrows; 2517 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2518 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2519 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2520 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2521 } else { 2522 c->compressedrow.use = PETSC_FALSE; 2523 c->compressedrow.i = PETSC_NULL; 2524 c->compressedrow.rindex = PETSC_NULL; 2525 } 2526 C->same_nonzero = A->same_nonzero; 2527 *B = C; 2528 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "MatLoad_SeqBAIJ" 2534 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2535 { 2536 Mat_SeqBAIJ *a; 2537 Mat B; 2538 PetscErrorCode ierr; 2539 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2540 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2541 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2542 PetscInt *masked,nmask,tmp,bs2,ishift; 2543 PetscMPIInt size; 2544 int fd; 2545 PetscScalar *aa; 2546 MPI_Comm comm = ((PetscObject)viewer)->comm; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2550 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2551 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2552 bs2 = bs*bs; 2553 2554 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2555 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2556 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2557 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2558 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2559 M = header[1]; N = header[2]; nz = header[3]; 2560 2561 if (header[3] < 0) { 2562 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2563 } 2564 2565 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2566 2567 /* 2568 This code adds extra rows to make sure the number of rows is 2569 divisible by the blocksize 2570 */ 2571 mbs = M/bs; 2572 extra_rows = bs - M + bs*(mbs); 2573 if (extra_rows == bs) extra_rows = 0; 2574 else mbs++; 2575 if (extra_rows) { 2576 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2577 } 2578 2579 /* read in row lengths */ 2580 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2581 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2582 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2583 2584 /* read in column indices */ 2585 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2586 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2587 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2588 2589 /* loop over row lengths determining block row lengths */ 2590 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2591 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2592 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2593 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2594 masked = mask + mbs; 2595 rowcount = 0; nzcount = 0; 2596 for (i=0; i<mbs; i++) { 2597 nmask = 0; 2598 for (j=0; j<bs; j++) { 2599 kmax = rowlengths[rowcount]; 2600 for (k=0; k<kmax; k++) { 2601 tmp = jj[nzcount++]/bs; 2602 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2603 } 2604 rowcount++; 2605 } 2606 browlengths[i] += nmask; 2607 /* zero out the mask elements we set */ 2608 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2609 } 2610 2611 /* create our matrix */ 2612 ierr = MatCreate(comm,&B); 2613 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2614 ierr = MatSetType(B,type);CHKERRQ(ierr); 2615 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2616 a = (Mat_SeqBAIJ*)B->data; 2617 2618 /* set matrix "i" values */ 2619 a->i[0] = 0; 2620 for (i=1; i<= mbs; i++) { 2621 a->i[i] = a->i[i-1] + browlengths[i-1]; 2622 a->ilen[i-1] = browlengths[i-1]; 2623 } 2624 a->nz = 0; 2625 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2626 2627 /* read in nonzero values */ 2628 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2629 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2630 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2631 2632 /* set "a" and "j" values into matrix */ 2633 nzcount = 0; jcount = 0; 2634 for (i=0; i<mbs; i++) { 2635 nzcountb = nzcount; 2636 nmask = 0; 2637 for (j=0; j<bs; j++) { 2638 kmax = rowlengths[i*bs+j]; 2639 for (k=0; k<kmax; k++) { 2640 tmp = jj[nzcount++]/bs; 2641 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2642 } 2643 } 2644 /* sort the masked values */ 2645 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2646 2647 /* set "j" values into matrix */ 2648 maskcount = 1; 2649 for (j=0; j<nmask; j++) { 2650 a->j[jcount++] = masked[j]; 2651 mask[masked[j]] = maskcount++; 2652 } 2653 /* set "a" values into matrix */ 2654 ishift = bs2*a->i[i]; 2655 for (j=0; j<bs; j++) { 2656 kmax = rowlengths[i*bs+j]; 2657 for (k=0; k<kmax; k++) { 2658 tmp = jj[nzcountb]/bs ; 2659 block = mask[tmp] - 1; 2660 point = jj[nzcountb] - bs*tmp; 2661 idx = ishift + bs2*block + j + bs*point; 2662 a->a[idx] = (MatScalar)aa[nzcountb++]; 2663 } 2664 } 2665 /* zero out the mask elements we set */ 2666 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2667 } 2668 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2669 2670 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2671 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2672 ierr = PetscFree(aa);CHKERRQ(ierr); 2673 ierr = PetscFree(jj);CHKERRQ(ierr); 2674 ierr = PetscFree(mask);CHKERRQ(ierr); 2675 2676 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2677 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2678 ierr = MatView_Private(B);CHKERRQ(ierr); 2679 2680 *A = B; 2681 PetscFunctionReturn(0); 2682 } 2683 2684 #undef __FUNCT__ 2685 #define __FUNCT__ "MatCreateSeqBAIJ" 2686 /*@C 2687 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2688 compressed row) format. For good matrix assembly performance the 2689 user should preallocate the matrix storage by setting the parameter nz 2690 (or the array nnz). By setting these parameters accurately, performance 2691 during matrix assembly can be increased by more than a factor of 50. 2692 2693 Collective on MPI_Comm 2694 2695 Input Parameters: 2696 + comm - MPI communicator, set to PETSC_COMM_SELF 2697 . bs - size of block 2698 . m - number of rows 2699 . n - number of columns 2700 . nz - number of nonzero blocks per block row (same for all rows) 2701 - nnz - array containing the number of nonzero blocks in the various block rows 2702 (possibly different for each block row) or PETSC_NULL 2703 2704 Output Parameter: 2705 . A - the matrix 2706 2707 Options Database Keys: 2708 . -mat_no_unroll - uses code that does not unroll the loops in the 2709 block calculations (much slower) 2710 . -mat_block_size - size of the blocks to use 2711 2712 Level: intermediate 2713 2714 Notes: 2715 The number of rows and columns must be divisible by blocksize. 2716 2717 If the nnz parameter is given then the nz parameter is ignored 2718 2719 A nonzero block is any block that as 1 or more nonzeros in it 2720 2721 The block AIJ format is fully compatible with standard Fortran 77 2722 storage. That is, the stored row and column indices can begin at 2723 either one (as in Fortran) or zero. See the users' manual for details. 2724 2725 Specify the preallocated storage with either nz or nnz (not both). 2726 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2727 allocation. For additional details, see the users manual chapter on 2728 matrices. 2729 2730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2731 @*/ 2732 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2733 { 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2738 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2739 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2740 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2746 /*@C 2747 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2748 per row in the matrix. For good matrix assembly performance the 2749 user should preallocate the matrix storage by setting the parameter nz 2750 (or the array nnz). By setting these parameters accurately, performance 2751 during matrix assembly can be increased by more than a factor of 50. 2752 2753 Collective on MPI_Comm 2754 2755 Input Parameters: 2756 + A - the matrix 2757 . bs - size of block 2758 . nz - number of block nonzeros per block row (same for all rows) 2759 - nnz - array containing the number of block nonzeros in the various block rows 2760 (possibly different for each block row) or PETSC_NULL 2761 2762 Options Database Keys: 2763 . -mat_no_unroll - uses code that does not unroll the loops in the 2764 block calculations (much slower) 2765 . -mat_block_size - size of the blocks to use 2766 2767 Level: intermediate 2768 2769 Notes: 2770 If the nnz parameter is given then the nz parameter is ignored 2771 2772 The block AIJ format is fully compatible with standard Fortran 77 2773 storage. That is, the stored row and column indices can begin at 2774 either one (as in Fortran) or zero. See the users' manual for details. 2775 2776 Specify the preallocated storage with either nz or nnz (not both). 2777 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2778 allocation. For additional details, see the users manual chapter on 2779 matrices. 2780 2781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2782 @*/ 2783 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2784 { 2785 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2786 2787 PetscFunctionBegin; 2788 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2789 if (f) { 2790 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2797 /*@ 2798 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2799 (upper triangular entries in CSR format) provided by the user. 2800 2801 Collective on MPI_Comm 2802 2803 Input Parameters: 2804 + comm - must be an MPI communicator of size 1 2805 . bs - size of block 2806 . m - number of rows 2807 . n - number of columns 2808 . i - row indices 2809 . j - column indices 2810 - a - matrix values 2811 2812 Output Parameter: 2813 . mat - the matrix 2814 2815 Level: intermediate 2816 2817 Notes: 2818 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2819 once the matrix is destroyed 2820 2821 You cannot set new nonzero locations into this matrix, that will generate an error. 2822 2823 The i and j indices are 0 based 2824 2825 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2826 2827 @*/ 2828 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2829 { 2830 PetscErrorCode ierr; 2831 PetscInt ii; 2832 Mat_SeqBAIJ *baij; 2833 2834 PetscFunctionBegin; 2835 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2836 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2837 2838 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2839 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2840 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2841 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2842 baij = (Mat_SeqBAIJ*)(*mat)->data; 2843 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2844 2845 baij->i = i; 2846 baij->j = j; 2847 baij->a = a; 2848 baij->singlemalloc = PETSC_FALSE; 2849 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2850 baij->free_a = PETSC_FALSE; 2851 baij->free_ij = PETSC_FALSE; 2852 2853 for (ii=0; ii<m; ii++) { 2854 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2855 #if defined(PETSC_USE_DEBUG) 2856 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2857 #endif 2858 } 2859 #if defined(PETSC_USE_DEBUG) 2860 for (ii=0; ii<baij->i[m]; ii++) { 2861 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2862 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2863 } 2864 #endif 2865 2866 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870