1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 18 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 19 20 PetscFunctionBegin; 21 if (a->idiagvalid) PetscFunctionReturn(0); 22 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 23 diag_offset = a->diag; 24 if (!a->idiag) { 25 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 26 } 27 diag = a->idiag; 28 mdiag = a->idiag+bs*bs*mbs; 29 /* factor and invert each block */ 30 switch (bs){ 31 case 2: 32 for (i=0; i<mbs; i++) { 33 odiag = v + 4*diag_offset[i]; 34 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 35 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 36 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 37 diag += 4; 38 mdiag += 4; 39 } 40 break; 41 case 3: 42 for (i=0; i<mbs; i++) { 43 odiag = v + 9*diag_offset[i]; 44 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 45 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 46 diag[8] = odiag[8]; 47 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 48 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 49 mdiag[8] = odiag[8]; 50 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 51 diag += 9; 52 mdiag += 9; 53 } 54 break; 55 case 4: 56 for (i=0; i<mbs; i++) { 57 odiag = v + 16*diag_offset[i]; 58 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 61 diag += 16; 62 mdiag += 16; 63 } 64 break; 65 case 5: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 25*diag_offset[i]; 68 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 71 diag += 25; 72 mdiag += 25; 73 } 74 break; 75 default: 76 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 77 } 78 a->idiagvalid = PETSC_TRUE; 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 87 PetscScalar *x,x1,x2,s1,s2; 88 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 89 PetscErrorCode ierr; 90 PetscInt m = a->mbs,i,i2,nz,idx; 91 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 PetscErrorCode ierr; 192 PetscInt m = a->mbs,i,i2,nz,idx; 193 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 194 195 PetscFunctionBegin; 196 its = its*lits; 197 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 198 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 199 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 200 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 201 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 202 203 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 204 205 diag = a->diag; 206 idiag = a->idiag; 207 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 210 if (flag & SOR_ZERO_INITIAL_GUESS) { 211 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 212 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 213 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 214 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 215 i2 = 3; 216 idiag += 9; 217 for (i=1; i<m; i++) { 218 v = aa + 9*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 222 while (nz--) { 223 idx = 3*(*vi++); 224 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 225 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 226 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 227 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 228 v += 9; 229 } 230 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 231 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 232 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 233 idiag += 9; 234 i2 += 3; 235 } 236 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 237 PetscLogFlops(9*(a->nz)); 238 } 239 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 240 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 241 i2 = 0; 242 mdiag = a->idiag+9*a->mbs; 243 for (i=0; i<m; i++) { 244 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 245 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 246 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 247 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 248 mdiag += 9; 249 i2 += 3; 250 } 251 PetscLogFlops(15*m); 252 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 253 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 256 idiag = a->idiag+9*a->mbs - 9; 257 i2 = 3*m - 3; 258 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 259 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 260 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 261 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 262 idiag -= 9; 263 i2 -= 3; 264 for (i=m-2; i>=0; i--) { 265 v = aa + 9*(diag[i]+1); 266 vi = aj + diag[i] + 1; 267 nz = ai[i+1] - diag[i] - 1; 268 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 269 while (nz--) { 270 idx = 3*(*vi++); 271 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 272 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 273 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 274 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 275 v += 9; 276 } 277 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 278 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 279 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 280 idiag -= 9; 281 i2 -= 3; 282 } 283 PetscLogFlops(9*(a->nz)); 284 } 285 } else { 286 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 287 } 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 296 { 297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 298 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 299 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 300 PetscErrorCode ierr; 301 PetscInt m = a->mbs,i,i2,nz,idx; 302 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 303 304 PetscFunctionBegin; 305 its = its*lits; 306 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 307 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 308 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 309 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 310 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 311 312 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 313 314 diag = a->diag; 315 idiag = a->idiag; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 318 319 if (flag & SOR_ZERO_INITIAL_GUESS) { 320 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 321 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 322 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 323 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 324 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 325 i2 = 4; 326 idiag += 16; 327 for (i=1; i<m; i++) { 328 v = aa + 16*ai[i]; 329 vi = aj + ai[i]; 330 nz = diag[i] - ai[i]; 331 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 332 while (nz--) { 333 idx = 4*(*vi++); 334 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 335 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 336 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 337 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 338 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 339 v += 16; 340 } 341 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 342 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 343 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 344 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 345 idiag += 16; 346 i2 += 4; 347 } 348 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 349 PetscLogFlops(16*(a->nz)); 350 } 351 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 352 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 353 i2 = 0; 354 mdiag = a->idiag+16*a->mbs; 355 for (i=0; i<m; i++) { 356 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 357 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 358 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 359 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 360 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 361 mdiag += 16; 362 i2 += 4; 363 } 364 PetscLogFlops(28*m); 365 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 366 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 367 } 368 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 369 idiag = a->idiag+16*a->mbs - 16; 370 i2 = 4*m - 4; 371 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 372 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 373 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 374 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 375 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 376 idiag -= 16; 377 i2 -= 4; 378 for (i=m-2; i>=0; i--) { 379 v = aa + 16*(diag[i]+1); 380 vi = aj + diag[i] + 1; 381 nz = ai[i+1] - diag[i] - 1; 382 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 383 while (nz--) { 384 idx = 4*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 386 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 387 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 388 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 389 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 390 v += 16; 391 } 392 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 393 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 394 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 395 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 396 idiag -= 16; 397 i2 -= 4; 398 } 399 PetscLogFlops(16*(a->nz)); 400 } 401 } else { 402 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 403 } 404 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 414 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 415 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 416 PetscErrorCode ierr; 417 PetscInt m = a->mbs,i,i2,nz,idx; 418 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 419 420 PetscFunctionBegin; 421 its = its*lits; 422 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 423 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 424 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 425 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 426 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 427 428 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 429 430 diag = a->diag; 431 idiag = a->idiag; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 434 435 if (flag & SOR_ZERO_INITIAL_GUESS) { 436 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 437 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 438 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 439 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 440 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 441 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 442 i2 = 5; 443 idiag += 25; 444 for (i=1; i<m; i++) { 445 v = aa + 25*ai[i]; 446 vi = aj + ai[i]; 447 nz = diag[i] - ai[i]; 448 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 449 while (nz--) { 450 idx = 5*(*vi++); 451 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 452 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 453 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 454 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 455 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 456 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 457 v += 25; 458 } 459 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 460 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 461 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 462 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 463 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 464 idiag += 25; 465 i2 += 5; 466 } 467 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 468 PetscLogFlops(25*(a->nz)); 469 } 470 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 471 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 472 i2 = 0; 473 mdiag = a->idiag+25*a->mbs; 474 for (i=0; i<m; i++) { 475 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 476 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 477 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 478 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 479 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 480 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 481 mdiag += 25; 482 i2 += 5; 483 } 484 PetscLogFlops(45*m); 485 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 486 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 489 idiag = a->idiag+25*a->mbs - 25; 490 i2 = 5*m - 5; 491 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 492 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 493 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 494 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 495 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 496 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 497 idiag -= 25; 498 i2 -= 5; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 25*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 504 while (nz--) { 505 idx = 5*(*vi++); 506 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 507 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 508 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 509 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 510 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 511 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 512 v += 25; 513 } 514 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 515 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 516 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 517 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 518 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 519 idiag -= 25; 520 i2 -= 5; 521 } 522 PetscLogFlops(25*(a->nz)); 523 } 524 } else { 525 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 526 } 527 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 Special version for Fun3d sequential benchmark 534 */ 535 #if defined(PETSC_HAVE_FORTRAN_CAPS) 536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 538 #define matsetvaluesblocked4_ matsetvaluesblocked4 539 #endif 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "matsetvaluesblocked4_" 544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 545 { 546 Mat A = *AA; 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 549 PetscInt *ai=a->i,*ailen=a->ilen; 550 PetscInt *aj=a->j,stepval; 551 const PetscScalar *value = v; 552 MatScalar *ap,*aa = a->a,*bap; 553 554 PetscFunctionBegin; 555 stepval = (n-1)*4; 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 rp = aj + ai[row]; 559 ap = aa + 16*ai[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 col = in[l]; 564 value = v + k*(stepval+4)*4 + l*4; 565 low = 0; high = nrow; 566 while (high-low > 7) { 567 t = (low+high)/2; 568 if (rp[t] > col) high = t; 569 else low = t; 570 } 571 for (i=low; i<high; i++) { 572 if (rp[i] > col) break; 573 if (rp[i] == col) { 574 bap = ap + 16*i; 575 for (ii=0; ii<4; ii++,value+=stepval) { 576 for (jj=ii; jj<16; jj+=4) { 577 bap[jj] += *value++; 578 } 579 } 580 goto noinsert2; 581 } 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for (ii=N; ii>=i; ii--) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 588 } 589 if (N >= i) { 590 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 591 } 592 rp[i] = col; 593 bap = ap + 16*i; 594 for (ii=0; ii<4; ii++,value+=stepval) { 595 for (jj=ii; jj<16; jj+=4) { 596 bap[jj] = *value++; 597 } 598 } 599 noinsert2:; 600 low = i; 601 } 602 ailen[row] = nrow; 603 } 604 } 605 EXTERN_C_END 606 607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 608 #define matsetvalues4_ MATSETVALUES4 609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 610 #define matsetvalues4_ matsetvalues4 611 #endif 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatSetValues4_" 616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 617 { 618 Mat A = *AA; 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 621 PetscInt *ai=a->i,*ailen=a->ilen; 622 PetscInt *aj=a->j,brow,bcol; 623 PetscInt ridx,cidx; 624 MatScalar *ap,value,*aa=a->a,*bap; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over added rows */ 628 row = im[k]; brow = row/4; 629 rp = aj + ai[brow]; 630 ap = aa + 16*ai[brow]; 631 nrow = ailen[brow]; 632 low = 0; 633 for (l=0; l<n; l++) { /* loop over added columns */ 634 col = in[l]; bcol = col/4; 635 ridx = row % 4; cidx = col % 4; 636 value = v[l + k*n]; 637 low = 0; high = nrow; 638 while (high-low > 7) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 bap = ap + 16*i + 4*cidx + ridx; 647 *bap += value; 648 goto noinsert1; 649 } 650 } 651 N = nrow++ - 1; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 656 } 657 if (N>=i) { 658 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 659 } 660 rp[i] = bcol; 661 ap[16*i + 4*cidx + ridx] = value; 662 noinsert1:; 663 low = i; 664 } 665 ailen[brow] = nrow; 666 } 667 } 668 EXTERN_C_END 669 670 /* UGLY, ugly, ugly 671 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 672 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 673 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 674 converts the entries into single precision and then calls ..._MatScalar() to put them 675 into the single precision data structures. 676 */ 677 #if defined(PETSC_USE_MAT_SINGLE) 678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 679 #else 680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 681 #endif 682 683 #define CHUNKSIZE 10 684 685 /* 686 Checks for missing diagonals 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt *diag,*jj = a->j,i; 695 696 PetscFunctionBegin; 697 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 698 diag = a->diag; 699 for (i=0; i<a->mbs; i++) { 700 if (jj[diag[i]] != i) { 701 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt i,j,*diag,m = a->mbs; 714 715 PetscFunctionBegin; 716 if (a->diag) PetscFunctionReturn(0); 717 718 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 719 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 720 for (i=0; i<m; i++) { 721 diag[i] = a->i[i+1]; 722 for (j=a->i[i]; j<a->i[i+1]; j++) { 723 if (a->j[j] == i) { 724 diag[i] = j; 725 break; 726 } 727 } 728 } 729 a->diag = diag; 730 PetscFunctionReturn(0); 731 } 732 733 734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 741 PetscErrorCode ierr; 742 PetscInt n = a->mbs,i; 743 744 PetscFunctionBegin; 745 *nn = n; 746 if (!ia) PetscFunctionReturn(0); 747 if (symmetric) { 748 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 749 } else if (oshift == 1) { 750 /* temporarily add 1 to i and j indices */ 751 PetscInt nz = a->i[n]; 752 for (i=0; i<nz; i++) a->j[i]++; 753 for (i=0; i<n+1; i++) a->i[i]++; 754 *ia = a->i; *ja = a->j; 755 } else { 756 *ia = a->i; *ja = a->j; 757 } 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 765 { 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscInt i,n = a->mbs; 769 770 PetscFunctionBegin; 771 if (!ia) PetscFunctionReturn(0); 772 if (symmetric) { 773 ierr = PetscFree(*ia);CHKERRQ(ierr); 774 ierr = PetscFree(*ja);CHKERRQ(ierr); 775 } else if (oshift == 1) { 776 PetscInt nz = a->i[n]-1; 777 for (i=0; i<nz; i++) a->j[i]--; 778 for (i=0; i<n+1; i++) a->i[i]--; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatDestroy_SeqBAIJ" 785 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 786 { 787 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 #if defined(PETSC_USE_LOG) 792 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 793 #endif 794 ierr = PetscFree(a->a);CHKERRQ(ierr); 795 if (!a->singlemalloc) { 796 ierr = PetscFree(a->i);CHKERRQ(ierr); 797 ierr = PetscFree(a->j);CHKERRQ(ierr); 798 } 799 if (a->row) { 800 ierr = ISDestroy(a->row);CHKERRQ(ierr); 801 } 802 if (a->col) { 803 ierr = ISDestroy(a->col);CHKERRQ(ierr); 804 } 805 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 806 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 807 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 808 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 809 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 810 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 811 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 812 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 813 #if defined(PETSC_USE_MAT_SINGLE) 814 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 815 #endif 816 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 817 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 818 819 ierr = PetscFree(a);CHKERRQ(ierr); 820 821 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 822 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 823 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 824 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 825 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 826 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatSetOption_SeqBAIJ" 832 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 836 PetscFunctionBegin; 837 switch (op) { 838 case MAT_ROW_ORIENTED: 839 a->roworiented = PETSC_TRUE; 840 break; 841 case MAT_COLUMN_ORIENTED: 842 a->roworiented = PETSC_FALSE; 843 break; 844 case MAT_COLUMNS_SORTED: 845 a->sorted = PETSC_TRUE; 846 break; 847 case MAT_COLUMNS_UNSORTED: 848 a->sorted = PETSC_FALSE; 849 break; 850 case MAT_KEEP_ZEROED_ROWS: 851 a->keepzeroedrows = PETSC_TRUE; 852 break; 853 case MAT_NO_NEW_NONZERO_LOCATIONS: 854 a->nonew = 1; 855 break; 856 case MAT_NEW_NONZERO_LOCATION_ERR: 857 a->nonew = -1; 858 break; 859 case MAT_NEW_NONZERO_ALLOCATION_ERR: 860 a->nonew = -2; 861 break; 862 case MAT_YES_NEW_NONZERO_LOCATIONS: 863 a->nonew = 0; 864 break; 865 case MAT_ROWS_SORTED: 866 case MAT_ROWS_UNSORTED: 867 case MAT_YES_NEW_DIAGONALS: 868 case MAT_IGNORE_OFF_PROC_ENTRIES: 869 case MAT_USE_HASH_TABLE: 870 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 871 break; 872 case MAT_NO_NEW_DIAGONALS: 873 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 874 case MAT_SYMMETRIC: 875 case MAT_STRUCTURALLY_SYMMETRIC: 876 case MAT_NOT_SYMMETRIC: 877 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 878 case MAT_HERMITIAN: 879 case MAT_NOT_HERMITIAN: 880 case MAT_SYMMETRY_ETERNAL: 881 case MAT_NOT_SYMMETRY_ETERNAL: 882 break; 883 default: 884 SETERRQ(PETSC_ERR_SUP,"unknown option"); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatGetRow_SeqBAIJ" 891 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 892 { 893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 894 PetscErrorCode ierr; 895 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 896 MatScalar *aa,*aa_i; 897 PetscScalar *v_i; 898 899 PetscFunctionBegin; 900 bs = A->bs; 901 ai = a->i; 902 aj = a->j; 903 aa = a->a; 904 bs2 = a->bs2; 905 906 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 907 908 bn = row/bs; /* Block number */ 909 bp = row % bs; /* Block Position */ 910 M = ai[bn+1] - ai[bn]; 911 *nz = bs*M; 912 913 if (v) { 914 *v = 0; 915 if (*nz) { 916 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 917 for (i=0; i<M; i++) { /* for each block in the block row */ 918 v_i = *v + i*bs; 919 aa_i = aa + bs2*(ai[bn] + i); 920 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 921 } 922 } 923 } 924 925 if (idx) { 926 *idx = 0; 927 if (*nz) { 928 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 929 for (i=0; i<M; i++) { /* for each block in the block row */ 930 idx_i = *idx + i*bs; 931 itmp = bs*aj[ai[bn] + i]; 932 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 933 } 934 } 935 } 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 941 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 942 { 943 PetscErrorCode ierr; 944 945 PetscFunctionBegin; 946 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 947 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatTranspose_SeqBAIJ" 953 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 954 { 955 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 956 Mat C; 957 PetscErrorCode ierr; 958 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 959 PetscInt *rows,*cols,bs2=a->bs2; 960 PetscScalar *array; 961 962 PetscFunctionBegin; 963 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 964 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 965 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 966 967 #if defined(PETSC_USE_MAT_SINGLE) 968 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 969 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 970 #else 971 array = a->a; 972 #endif 973 974 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 975 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 976 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 977 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 978 ierr = PetscFree(col);CHKERRQ(ierr); 979 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 980 cols = rows + bs; 981 for (i=0; i<mbs; i++) { 982 cols[0] = i*bs; 983 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 984 len = ai[i+1] - ai[i]; 985 for (j=0; j<len; j++) { 986 rows[0] = (*aj++)*bs; 987 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 988 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 989 array += bs2; 990 } 991 } 992 ierr = PetscFree(rows);CHKERRQ(ierr); 993 #if defined(PETSC_USE_MAT_SINGLE) 994 ierr = PetscFree(array); 995 #endif 996 997 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 998 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 1000 if (B) { 1001 *B = C; 1002 } else { 1003 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1004 } 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1010 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1011 { 1012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1013 PetscErrorCode ierr; 1014 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1015 int fd; 1016 PetscScalar *aa; 1017 FILE *file; 1018 1019 PetscFunctionBegin; 1020 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1021 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1022 col_lens[0] = MAT_FILE_COOKIE; 1023 1024 col_lens[1] = A->m; 1025 col_lens[2] = A->n; 1026 col_lens[3] = a->nz*bs2; 1027 1028 /* store lengths of each row and write (including header) to file */ 1029 count = 0; 1030 for (i=0; i<a->mbs; i++) { 1031 for (j=0; j<bs; j++) { 1032 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1033 } 1034 } 1035 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1036 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1037 1038 /* store column indices (zero start index) */ 1039 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1040 count = 0; 1041 for (i=0; i<a->mbs; i++) { 1042 for (j=0; j<bs; j++) { 1043 for (k=a->i[i]; k<a->i[i+1]; k++) { 1044 for (l=0; l<bs; l++) { 1045 jj[count++] = bs*a->j[k] + l; 1046 } 1047 } 1048 } 1049 } 1050 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1051 ierr = PetscFree(jj);CHKERRQ(ierr); 1052 1053 /* store nonzero values */ 1054 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1055 count = 0; 1056 for (i=0; i<a->mbs; i++) { 1057 for (j=0; j<bs; j++) { 1058 for (k=a->i[i]; k<a->i[i+1]; k++) { 1059 for (l=0; l<bs; l++) { 1060 aa[count++] = a->a[bs2*k + l*bs + j]; 1061 } 1062 } 1063 } 1064 } 1065 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1066 ierr = PetscFree(aa);CHKERRQ(ierr); 1067 1068 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1069 if (file) { 1070 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1071 } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1077 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1078 { 1079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1080 PetscErrorCode ierr; 1081 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1082 PetscViewerFormat format; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1086 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1087 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1088 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1089 Mat aij; 1090 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1091 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1092 ierr = MatDestroy(aij);CHKERRQ(ierr); 1093 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1094 PetscFunctionReturn(0); 1095 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1096 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1097 for (i=0; i<a->mbs; i++) { 1098 for (j=0; j<bs; j++) { 1099 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1100 for (k=a->i[i]; k<a->i[i+1]; k++) { 1101 for (l=0; l<bs; l++) { 1102 #if defined(PETSC_USE_COMPLEX) 1103 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1104 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1105 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1106 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1107 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1108 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1109 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1110 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1111 } 1112 #else 1113 if (a->a[bs2*k + l*bs + j] != 0.0) { 1114 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1115 } 1116 #endif 1117 } 1118 } 1119 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1120 } 1121 } 1122 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1123 } else { 1124 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1125 for (i=0; i<a->mbs; i++) { 1126 for (j=0; j<bs; j++) { 1127 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1128 for (k=a->i[i]; k<a->i[i+1]; k++) { 1129 for (l=0; l<bs; l++) { 1130 #if defined(PETSC_USE_COMPLEX) 1131 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1132 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1133 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1134 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1135 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1136 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1139 } 1140 #else 1141 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1142 #endif 1143 } 1144 } 1145 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1146 } 1147 } 1148 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1149 } 1150 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1156 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1157 { 1158 Mat A = (Mat) Aa; 1159 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1162 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1163 MatScalar *aa; 1164 PetscViewer viewer; 1165 1166 PetscFunctionBegin; 1167 1168 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1169 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1170 1171 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1172 1173 /* loop over matrix elements drawing boxes */ 1174 color = PETSC_DRAW_BLUE; 1175 for (i=0,row=0; i<mbs; i++,row+=bs) { 1176 for (j=a->i[i]; j<a->i[i+1]; j++) { 1177 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1178 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1179 aa = a->a + j*bs2; 1180 for (k=0; k<bs; k++) { 1181 for (l=0; l<bs; l++) { 1182 if (PetscRealPart(*aa++) >= 0.) continue; 1183 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1184 } 1185 } 1186 } 1187 } 1188 color = PETSC_DRAW_CYAN; 1189 for (i=0,row=0; i<mbs; i++,row+=bs) { 1190 for (j=a->i[i]; j<a->i[i+1]; j++) { 1191 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1192 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1193 aa = a->a + j*bs2; 1194 for (k=0; k<bs; k++) { 1195 for (l=0; l<bs; l++) { 1196 if (PetscRealPart(*aa++) != 0.) continue; 1197 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1198 } 1199 } 1200 } 1201 } 1202 1203 color = PETSC_DRAW_RED; 1204 for (i=0,row=0; i<mbs; i++,row+=bs) { 1205 for (j=a->i[i]; j<a->i[i+1]; j++) { 1206 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1207 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1208 aa = a->a + j*bs2; 1209 for (k=0; k<bs; k++) { 1210 for (l=0; l<bs; l++) { 1211 if (PetscRealPart(*aa++) <= 0.) continue; 1212 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1213 } 1214 } 1215 } 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1222 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1223 { 1224 PetscErrorCode ierr; 1225 PetscReal xl,yl,xr,yr,w,h; 1226 PetscDraw draw; 1227 PetscTruth isnull; 1228 1229 PetscFunctionBegin; 1230 1231 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1232 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1233 1234 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1235 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1236 xr += w; yr += h; xl = -w; yl = -h; 1237 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1238 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1239 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_SeqBAIJ" 1245 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscTruth iascii,isbinary,isdraw; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1253 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1254 if (iascii){ 1255 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1256 } else if (isbinary) { 1257 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1258 } else if (isdraw) { 1259 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1260 } else { 1261 Mat B; 1262 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1263 ierr = MatView(B,viewer);CHKERRQ(ierr); 1264 ierr = MatDestroy(B);CHKERRQ(ierr); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1272 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1273 { 1274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1275 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1276 PetscInt *ai = a->i,*ailen = a->ilen; 1277 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1278 MatScalar *ap,*aa = a->a,zero = 0.0; 1279 1280 PetscFunctionBegin; 1281 for (k=0; k<m; k++) { /* loop over rows */ 1282 row = im[k]; brow = row/bs; 1283 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1284 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1285 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1286 nrow = ailen[brow]; 1287 for (l=0; l<n; l++) { /* loop over columns */ 1288 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1289 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1290 col = in[l] ; 1291 bcol = col/bs; 1292 cidx = col%bs; 1293 ridx = row%bs; 1294 high = nrow; 1295 low = 0; /* assume unsorted */ 1296 while (high-low > 5) { 1297 t = (low+high)/2; 1298 if (rp[t] > bcol) high = t; 1299 else low = t; 1300 } 1301 for (i=low; i<high; i++) { 1302 if (rp[i] > bcol) break; 1303 if (rp[i] == bcol) { 1304 *v++ = ap[bs2*i+bs*cidx+ridx]; 1305 goto finished; 1306 } 1307 } 1308 *v++ = zero; 1309 finished:; 1310 } 1311 } 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #if defined(PETSC_USE_MAT_SINGLE) 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1318 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1319 { 1320 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1321 PetscErrorCode ierr; 1322 PetscInt i,N = m*n*b->bs2; 1323 MatScalar *vsingle; 1324 1325 PetscFunctionBegin; 1326 if (N > b->setvalueslen) { 1327 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1328 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1329 b->setvalueslen = N; 1330 } 1331 vsingle = b->setvaluescopy; 1332 for (i=0; i<N; i++) { 1333 vsingle[i] = v[i]; 1334 } 1335 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 #endif 1339 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1343 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1344 { 1345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1346 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1347 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1348 PetscErrorCode ierr; 1349 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1350 PetscTruth roworiented=a->roworiented; 1351 const MatScalar *value = v; 1352 MatScalar *ap,*aa = a->a,*bap; 1353 1354 PetscFunctionBegin; 1355 if (roworiented) { 1356 stepval = (n-1)*bs; 1357 } else { 1358 stepval = (m-1)*bs; 1359 } 1360 for (k=0; k<m; k++) { /* loop over added rows */ 1361 row = im[k]; 1362 if (row < 0) continue; 1363 #if defined(PETSC_USE_DEBUG) 1364 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1365 #endif 1366 rp = aj + ai[row]; 1367 ap = aa + bs2*ai[row]; 1368 rmax = imax[row]; 1369 nrow = ailen[row]; 1370 low = 0; 1371 high = nrow; 1372 for (l=0; l<n; l++) { /* loop over added columns */ 1373 if (in[l] < 0) continue; 1374 #if defined(PETSC_USE_DEBUG) 1375 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1376 #endif 1377 col = in[l]; 1378 if (roworiented) { 1379 value = v + k*(stepval+bs)*bs + l*bs; 1380 } else { 1381 value = v + l*(stepval+bs)*bs + k*bs; 1382 } 1383 if (col < lastcol) low = 0; else high = nrow; 1384 lastcol = col; 1385 while (high-low > 7) { 1386 t = (low+high)/2; 1387 if (rp[t] > col) high = t; 1388 else low = t; 1389 } 1390 for (i=low; i<high; i++) { 1391 if (rp[i] > col) break; 1392 if (rp[i] == col) { 1393 bap = ap + bs2*i; 1394 if (roworiented) { 1395 if (is == ADD_VALUES) { 1396 for (ii=0; ii<bs; ii++,value+=stepval) { 1397 for (jj=ii; jj<bs2; jj+=bs) { 1398 bap[jj] += *value++; 1399 } 1400 } 1401 } else { 1402 for (ii=0; ii<bs; ii++,value+=stepval) { 1403 for (jj=ii; jj<bs2; jj+=bs) { 1404 bap[jj] = *value++; 1405 } 1406 } 1407 } 1408 } else { 1409 if (is == ADD_VALUES) { 1410 for (ii=0; ii<bs; ii++,value+=stepval) { 1411 for (jj=0; jj<bs; jj++) { 1412 *bap++ += *value++; 1413 } 1414 } 1415 } else { 1416 for (ii=0; ii<bs; ii++,value+=stepval) { 1417 for (jj=0; jj<bs; jj++) { 1418 *bap++ = *value++; 1419 } 1420 } 1421 } 1422 } 1423 goto noinsert2; 1424 } 1425 } 1426 if (nonew == 1) goto noinsert2; 1427 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1428 if (nrow >= rmax) { 1429 /* there is no extra room in row, therefore enlarge */ 1430 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1431 MatScalar *new_a; 1432 1433 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1434 1435 /* malloc new storage space */ 1436 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1437 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1438 new_j = (PetscInt*)(new_a + bs2*new_nz); 1439 new_i = new_j + new_nz; 1440 1441 /* copy over old data into new slots */ 1442 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1443 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1444 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1445 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1446 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1447 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1448 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1449 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1450 /* free up old matrix storage */ 1451 ierr = PetscFree(a->a);CHKERRQ(ierr); 1452 if (!a->singlemalloc) { 1453 ierr = PetscFree(a->i);CHKERRQ(ierr); 1454 ierr = PetscFree(a->j);CHKERRQ(ierr); 1455 } 1456 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1457 a->singlemalloc = PETSC_TRUE; 1458 1459 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1460 rmax = imax[row] = imax[row] + CHUNKSIZE; 1461 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1462 a->maxnz += bs2*CHUNKSIZE; 1463 a->reallocs++; 1464 a->nz++; 1465 } 1466 N = nrow++ - 1; 1467 /* shift up all the later entries in this row */ 1468 for (ii=N; ii>=i; ii--) { 1469 rp[ii+1] = rp[ii]; 1470 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1471 } 1472 if (N >= i) { 1473 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1474 } 1475 rp[i] = col; 1476 bap = ap + bs2*i; 1477 if (roworiented) { 1478 for (ii=0; ii<bs; ii++,value+=stepval) { 1479 for (jj=ii; jj<bs2; jj+=bs) { 1480 bap[jj] = *value++; 1481 } 1482 } 1483 } else { 1484 for (ii=0; ii<bs; ii++,value+=stepval) { 1485 for (jj=0; jj<bs; jj++) { 1486 *bap++ = *value++; 1487 } 1488 } 1489 } 1490 noinsert2:; 1491 low = i; 1492 } 1493 ailen[row] = nrow; 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1500 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1501 { 1502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1503 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1504 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1505 PetscErrorCode ierr; 1506 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1507 MatScalar *aa = a->a,*ap; 1508 PetscReal ratio=0.6; 1509 1510 PetscFunctionBegin; 1511 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1512 1513 if (m) rmax = ailen[0]; 1514 for (i=1; i<mbs; i++) { 1515 /* move each row back by the amount of empty slots (fshift) before it*/ 1516 fshift += imax[i-1] - ailen[i-1]; 1517 rmax = PetscMax(rmax,ailen[i]); 1518 if (fshift) { 1519 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1520 N = ailen[i]; 1521 for (j=0; j<N; j++) { 1522 ip[j-fshift] = ip[j]; 1523 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1524 } 1525 } 1526 ai[i] = ai[i-1] + ailen[i-1]; 1527 } 1528 if (mbs) { 1529 fshift += imax[mbs-1] - ailen[mbs-1]; 1530 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1531 } 1532 /* reset ilen and imax for each row */ 1533 for (i=0; i<mbs; i++) { 1534 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1535 } 1536 a->nz = ai[mbs]; 1537 1538 /* diagonals may have moved, so kill the diagonal pointers */ 1539 a->idiagvalid = PETSC_FALSE; 1540 if (fshift && a->diag) { 1541 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1542 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1543 a->diag = 0; 1544 } 1545 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2); 1546 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs); 1547 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 1548 a->reallocs = 0; 1549 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1550 1551 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1552 if (a->compressedrow.use){ 1553 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1554 } 1555 1556 A->same_nonzero = PETSC_TRUE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 /* 1561 This function returns an array of flags which indicate the locations of contiguous 1562 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1563 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1564 Assume: sizes should be long enough to hold all the values. 1565 */ 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1568 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1569 { 1570 PetscInt i,j,k,row; 1571 PetscTruth flg; 1572 1573 PetscFunctionBegin; 1574 for (i=0,j=0; i<n; j++) { 1575 row = idx[i]; 1576 if (row%bs!=0) { /* Not the begining of a block */ 1577 sizes[j] = 1; 1578 i++; 1579 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1580 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1581 i++; 1582 } else { /* Begining of the block, so check if the complete block exists */ 1583 flg = PETSC_TRUE; 1584 for (k=1; k<bs; k++) { 1585 if (row+k != idx[i+k]) { /* break in the block */ 1586 flg = PETSC_FALSE; 1587 break; 1588 } 1589 } 1590 if (flg) { /* No break in the bs */ 1591 sizes[j] = bs; 1592 i+= bs; 1593 } else { 1594 sizes[j] = 1; 1595 i++; 1596 } 1597 } 1598 } 1599 *bs_max = j; 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1605 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1606 { 1607 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1608 PetscErrorCode ierr; 1609 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1610 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1611 PetscScalar zero = 0.0; 1612 MatScalar *aa; 1613 1614 PetscFunctionBegin; 1615 /* Make a copy of the IS and sort it */ 1616 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1617 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1618 1619 /* allocate memory for rows,sizes */ 1620 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1621 sizes = rows + is_n; 1622 1623 /* copy IS values to rows, and sort them */ 1624 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1625 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1626 if (baij->keepzeroedrows) { 1627 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1628 bs_max = is_n; 1629 A->same_nonzero = PETSC_TRUE; 1630 } else { 1631 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1632 A->same_nonzero = PETSC_FALSE; 1633 } 1634 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1635 1636 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1637 row = rows[j]; 1638 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1639 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1640 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1641 if (sizes[i] == bs && !baij->keepzeroedrows) { 1642 if (diag) { 1643 if (baij->ilen[row/bs] > 0) { 1644 baij->ilen[row/bs] = 1; 1645 baij->j[baij->i[row/bs]] = row/bs; 1646 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1647 } 1648 /* Now insert all the diagonal values for this bs */ 1649 for (k=0; k<bs; k++) { 1650 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1651 } 1652 } else { /* (!diag) */ 1653 baij->ilen[row/bs] = 0; 1654 } /* end (!diag) */ 1655 } else { /* (sizes[i] != bs) */ 1656 #if defined (PETSC_USE_DEBUG) 1657 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1658 #endif 1659 for (k=0; k<count; k++) { 1660 aa[0] = zero; 1661 aa += bs; 1662 } 1663 if (diag) { 1664 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 } 1667 } 1668 1669 ierr = PetscFree(rows);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1677 { 1678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1679 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1680 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1681 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1682 PetscErrorCode ierr; 1683 PetscInt ridx,cidx,bs2=a->bs2; 1684 PetscTruth roworiented=a->roworiented; 1685 MatScalar *ap,value,*aa=a->a,*bap; 1686 1687 PetscFunctionBegin; 1688 for (k=0; k<m; k++) { /* loop over added rows */ 1689 row = im[k]; brow = row/bs; 1690 if (row < 0) continue; 1691 #if defined(PETSC_USE_DEBUG) 1692 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1693 #endif 1694 rp = aj + ai[brow]; 1695 ap = aa + bs2*ai[brow]; 1696 rmax = imax[brow]; 1697 nrow = ailen[brow]; 1698 low = 0; 1699 high = nrow; 1700 for (l=0; l<n; l++) { /* loop over added columns */ 1701 if (in[l] < 0) continue; 1702 #if defined(PETSC_USE_DEBUG) 1703 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1704 #endif 1705 col = in[l]; bcol = col/bs; 1706 ridx = row % bs; cidx = col % bs; 1707 if (roworiented) { 1708 value = v[l + k*n]; 1709 } else { 1710 value = v[k + l*m]; 1711 } 1712 if (col < lastcol) low = 0; else high = nrow; 1713 lastcol = col; 1714 while (high-low > 7) { 1715 t = (low+high)/2; 1716 if (rp[t] > bcol) high = t; 1717 else low = t; 1718 } 1719 for (i=low; i<high; i++) { 1720 if (rp[i] > bcol) break; 1721 if (rp[i] == bcol) { 1722 bap = ap + bs2*i + bs*cidx + ridx; 1723 if (is == ADD_VALUES) *bap += value; 1724 else *bap = value; 1725 goto noinsert1; 1726 } 1727 } 1728 if (nonew == 1) goto noinsert1; 1729 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1730 if (nrow >= rmax) { 1731 /* there is no extra room in row, therefore enlarge */ 1732 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1733 MatScalar *new_a; 1734 1735 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1736 1737 /* Malloc new storage space */ 1738 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1739 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1740 new_j = (PetscInt*)(new_a + bs2*new_nz); 1741 new_i = new_j + new_nz; 1742 1743 /* copy over old data into new slots */ 1744 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1745 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1746 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1747 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1748 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1749 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1750 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1751 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1752 /* free up old matrix storage */ 1753 ierr = PetscFree(a->a);CHKERRQ(ierr); 1754 if (!a->singlemalloc) { 1755 ierr = PetscFree(a->i);CHKERRQ(ierr); 1756 ierr = PetscFree(a->j);CHKERRQ(ierr); 1757 } 1758 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1759 a->singlemalloc = PETSC_TRUE; 1760 1761 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1762 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1763 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1764 a->maxnz += bs2*CHUNKSIZE; 1765 a->reallocs++; 1766 a->nz++; 1767 } 1768 N = nrow++ - 1; 1769 /* shift up all the later entries in this row */ 1770 for (ii=N; ii>=i; ii--) { 1771 rp[ii+1] = rp[ii]; 1772 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1773 } 1774 if (N>=i) { 1775 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1776 } 1777 rp[i] = bcol; 1778 ap[bs2*i + bs*cidx + ridx] = value; 1779 noinsert1:; 1780 low = i; 1781 } 1782 ailen[brow] = nrow; 1783 } 1784 A->same_nonzero = PETSC_FALSE; 1785 PetscFunctionReturn(0); 1786 } 1787 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1791 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1792 { 1793 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1794 Mat outA; 1795 PetscErrorCode ierr; 1796 PetscTruth row_identity,col_identity; 1797 1798 PetscFunctionBegin; 1799 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1800 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1801 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1802 if (!row_identity || !col_identity) { 1803 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1804 } 1805 1806 outA = inA; 1807 inA->factor = FACTOR_LU; 1808 1809 if (!a->diag) { 1810 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1811 } 1812 1813 a->row = row; 1814 a->col = col; 1815 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1816 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1817 1818 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1819 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1820 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1821 1822 /* 1823 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1824 for ILU(0) factorization with natural ordering 1825 */ 1826 if (inA->bs < 8) { 1827 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1828 } else { 1829 if (!a->solve_work) { 1830 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1831 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1832 } 1833 } 1834 1835 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1836 1837 PetscFunctionReturn(0); 1838 } 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1841 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1842 { 1843 static PetscTruth called = PETSC_FALSE; 1844 MPI_Comm comm = A->comm; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1849 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1850 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 EXTERN_C_BEGIN 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1857 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1858 { 1859 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1860 PetscInt i,nz,nbs; 1861 1862 PetscFunctionBegin; 1863 nz = baij->maxnz/baij->bs2; 1864 nbs = baij->nbs; 1865 for (i=0; i<nz; i++) { 1866 baij->j[i] = indices[i]; 1867 } 1868 baij->nz = nz; 1869 for (i=0; i<nbs; i++) { 1870 baij->ilen[i] = baij->imax[i]; 1871 } 1872 1873 PetscFunctionReturn(0); 1874 } 1875 EXTERN_C_END 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1879 /*@ 1880 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1881 in the matrix. 1882 1883 Input Parameters: 1884 + mat - the SeqBAIJ matrix 1885 - indices - the column indices 1886 1887 Level: advanced 1888 1889 Notes: 1890 This can be called if you have precomputed the nonzero structure of the 1891 matrix and want to provide it to the matrix object to improve the performance 1892 of the MatSetValues() operation. 1893 1894 You MUST have set the correct numbers of nonzeros per row in the call to 1895 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1896 1897 MUST be called before any calls to MatSetValues(); 1898 1899 @*/ 1900 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1901 { 1902 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1903 1904 PetscFunctionBegin; 1905 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1906 PetscValidPointer(indices,2); 1907 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1908 if (f) { 1909 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1910 } else { 1911 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1918 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1919 { 1920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1921 PetscErrorCode ierr; 1922 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1923 PetscReal atmp; 1924 PetscScalar *x,zero = 0.0; 1925 MatScalar *aa; 1926 PetscInt ncols,brow,krow,kcol; 1927 1928 PetscFunctionBegin; 1929 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1930 bs = A->bs; 1931 aa = a->a; 1932 ai = a->i; 1933 aj = a->j; 1934 mbs = a->mbs; 1935 1936 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1937 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1938 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1939 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1940 for (i=0; i<mbs; i++) { 1941 ncols = ai[1] - ai[0]; ai++; 1942 brow = bs*i; 1943 for (j=0; j<ncols; j++){ 1944 /* bcol = bs*(*aj); */ 1945 for (kcol=0; kcol<bs; kcol++){ 1946 for (krow=0; krow<bs; krow++){ 1947 atmp = PetscAbsScalar(*aa); aa++; 1948 row = brow + krow; /* row index */ 1949 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1950 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1951 } 1952 } 1953 aj++; 1954 } 1955 } 1956 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1962 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1963 { 1964 PetscErrorCode ierr; 1965 1966 PetscFunctionBegin; 1967 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1973 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1974 { 1975 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1976 PetscFunctionBegin; 1977 *array = a->a; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1983 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1984 { 1985 PetscFunctionBegin; 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #include "petscblaslapack.h" 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1992 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1993 { 1994 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1995 PetscErrorCode ierr; 1996 PetscInt i,bs=Y->bs,j,bs2; 1997 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1998 1999 PetscFunctionBegin; 2000 if (str == SAME_NONZERO_PATTERN) { 2001 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2002 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2003 if (y->xtoy && y->XtoY != X) { 2004 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2005 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2006 } 2007 if (!y->xtoy) { /* get xtoy */ 2008 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2009 y->XtoY = X; 2010 } 2011 bs2 = bs*bs; 2012 for (i=0; i<x->nz; i++) { 2013 j = 0; 2014 while (j < bs2){ 2015 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2016 j++; 2017 } 2018 } 2019 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 2020 } else { 2021 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 /* -------------------------------------------------------------------*/ 2027 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2028 MatGetRow_SeqBAIJ, 2029 MatRestoreRow_SeqBAIJ, 2030 MatMult_SeqBAIJ_N, 2031 /* 4*/ MatMultAdd_SeqBAIJ_N, 2032 MatMultTranspose_SeqBAIJ, 2033 MatMultTransposeAdd_SeqBAIJ, 2034 MatSolve_SeqBAIJ_N, 2035 0, 2036 0, 2037 /*10*/ 0, 2038 MatLUFactor_SeqBAIJ, 2039 0, 2040 0, 2041 MatTranspose_SeqBAIJ, 2042 /*15*/ MatGetInfo_SeqBAIJ, 2043 MatEqual_SeqBAIJ, 2044 MatGetDiagonal_SeqBAIJ, 2045 MatDiagonalScale_SeqBAIJ, 2046 MatNorm_SeqBAIJ, 2047 /*20*/ 0, 2048 MatAssemblyEnd_SeqBAIJ, 2049 0, 2050 MatSetOption_SeqBAIJ, 2051 MatZeroEntries_SeqBAIJ, 2052 /*25*/ MatZeroRows_SeqBAIJ, 2053 MatLUFactorSymbolic_SeqBAIJ, 2054 MatLUFactorNumeric_SeqBAIJ_N, 2055 MatCholeskyFactorSymbolic_SeqBAIJ, 2056 MatCholeskyFactorNumeric_SeqBAIJ_N, 2057 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2058 MatILUFactorSymbolic_SeqBAIJ, 2059 MatICCFactorSymbolic_SeqBAIJ, 2060 MatGetArray_SeqBAIJ, 2061 MatRestoreArray_SeqBAIJ, 2062 /*35*/ MatDuplicate_SeqBAIJ, 2063 0, 2064 0, 2065 MatILUFactor_SeqBAIJ, 2066 0, 2067 /*40*/ MatAXPY_SeqBAIJ, 2068 MatGetSubMatrices_SeqBAIJ, 2069 MatIncreaseOverlap_SeqBAIJ, 2070 MatGetValues_SeqBAIJ, 2071 0, 2072 /*45*/ MatPrintHelp_SeqBAIJ, 2073 MatScale_SeqBAIJ, 2074 0, 2075 0, 2076 0, 2077 /*50*/ 0, 2078 MatGetRowIJ_SeqBAIJ, 2079 MatRestoreRowIJ_SeqBAIJ, 2080 0, 2081 0, 2082 /*55*/ 0, 2083 0, 2084 0, 2085 0, 2086 MatSetValuesBlocked_SeqBAIJ, 2087 /*60*/ MatGetSubMatrix_SeqBAIJ, 2088 MatDestroy_SeqBAIJ, 2089 MatView_SeqBAIJ, 2090 MatGetPetscMaps_Petsc, 2091 0, 2092 /*65*/ 0, 2093 0, 2094 0, 2095 0, 2096 0, 2097 /*70*/ MatGetRowMax_SeqBAIJ, 2098 MatConvert_Basic, 2099 0, 2100 0, 2101 0, 2102 /*75*/ 0, 2103 0, 2104 0, 2105 0, 2106 0, 2107 /*80*/ 0, 2108 0, 2109 0, 2110 0, 2111 MatLoad_SeqBAIJ, 2112 /*85*/ 0, 2113 0, 2114 0, 2115 0, 2116 0, 2117 /*90*/ 0, 2118 0, 2119 0, 2120 0, 2121 0, 2122 /*95*/ 0, 2123 0, 2124 0, 2125 0}; 2126 2127 EXTERN_C_BEGIN 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2130 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2131 { 2132 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2133 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 if (aij->nonew != 1) { 2138 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2139 } 2140 2141 /* allocate space for values if not already there */ 2142 if (!aij->saved_values) { 2143 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2144 } 2145 2146 /* copy values over */ 2147 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 EXTERN_C_END 2151 2152 EXTERN_C_BEGIN 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2155 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2156 { 2157 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2158 PetscErrorCode ierr; 2159 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2160 2161 PetscFunctionBegin; 2162 if (aij->nonew != 1) { 2163 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2164 } 2165 if (!aij->saved_values) { 2166 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2167 } 2168 2169 /* copy values over */ 2170 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 EXTERN_C_END 2174 2175 EXTERN_C_BEGIN 2176 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2177 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2178 EXTERN_C_END 2179 2180 EXTERN_C_BEGIN 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2183 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2184 { 2185 Mat_SeqBAIJ *b; 2186 PetscErrorCode ierr; 2187 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2188 PetscTruth flg; 2189 2190 PetscFunctionBegin; 2191 2192 B->preallocated = PETSC_TRUE; 2193 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2194 if (nnz && newbs != bs) { 2195 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2196 } 2197 bs = newbs; 2198 2199 mbs = B->m/bs; 2200 nbs = B->n/bs; 2201 bs2 = bs*bs; 2202 2203 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2204 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2205 } 2206 2207 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2208 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2209 if (nnz) { 2210 for (i=0; i<mbs; i++) { 2211 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2212 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2213 } 2214 } 2215 2216 b = (Mat_SeqBAIJ*)B->data; 2217 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2218 B->ops->solve = MatSolve_SeqBAIJ_Update; 2219 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2220 if (!flg) { 2221 switch (bs) { 2222 case 1: 2223 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2224 B->ops->mult = MatMult_SeqBAIJ_1; 2225 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2226 break; 2227 case 2: 2228 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2229 B->ops->mult = MatMult_SeqBAIJ_2; 2230 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2231 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2232 break; 2233 case 3: 2234 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2235 B->ops->mult = MatMult_SeqBAIJ_3; 2236 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2237 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2238 break; 2239 case 4: 2240 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2241 B->ops->mult = MatMult_SeqBAIJ_4; 2242 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2243 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2244 break; 2245 case 5: 2246 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2247 B->ops->mult = MatMult_SeqBAIJ_5; 2248 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2249 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2250 break; 2251 case 6: 2252 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2253 B->ops->mult = MatMult_SeqBAIJ_6; 2254 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2255 break; 2256 case 7: 2257 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2258 B->ops->mult = MatMult_SeqBAIJ_7; 2259 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2260 break; 2261 default: 2262 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2263 B->ops->mult = MatMult_SeqBAIJ_N; 2264 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2265 break; 2266 } 2267 } 2268 B->bs = bs; 2269 b->mbs = mbs; 2270 b->nbs = nbs; 2271 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2272 if (!nnz) { 2273 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2274 else if (nz <= 0) nz = 1; 2275 for (i=0; i<mbs; i++) b->imax[i] = nz; 2276 nz = nz*mbs; 2277 } else { 2278 nz = 0; 2279 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2280 } 2281 2282 /* allocate the matrix space */ 2283 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2284 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2285 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2286 b->j = (PetscInt*)(b->a + nz*bs2); 2287 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2288 b->i = b->j + nz; 2289 b->singlemalloc = PETSC_TRUE; 2290 2291 b->i[0] = 0; 2292 for (i=1; i<mbs+1; i++) { 2293 b->i[i] = b->i[i-1] + b->imax[i-1]; 2294 } 2295 2296 /* b->ilen will count nonzeros in each block row so far. */ 2297 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2298 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2299 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2300 2301 B->bs = bs; 2302 b->bs2 = bs2; 2303 b->mbs = mbs; 2304 b->nz = 0; 2305 b->maxnz = nz*bs2; 2306 B->info.nz_unneeded = (PetscReal)b->maxnz; 2307 PetscFunctionReturn(0); 2308 } 2309 EXTERN_C_END 2310 2311 /*MC 2312 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2313 block sparse compressed row format. 2314 2315 Options Database Keys: 2316 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2317 2318 Level: beginner 2319 2320 .seealso: MatCreateSeqBAIJ 2321 M*/ 2322 2323 EXTERN_C_BEGIN 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatCreate_SeqBAIJ" 2326 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2327 { 2328 PetscErrorCode ierr; 2329 PetscMPIInt size; 2330 Mat_SeqBAIJ *b; 2331 2332 PetscFunctionBegin; 2333 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2334 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2335 2336 B->m = B->M = PetscMax(B->m,B->M); 2337 B->n = B->N = PetscMax(B->n,B->N); 2338 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2339 B->data = (void*)b; 2340 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2341 B->factor = 0; 2342 B->lupivotthreshold = 1.0; 2343 B->mapping = 0; 2344 b->row = 0; 2345 b->col = 0; 2346 b->icol = 0; 2347 b->reallocs = 0; 2348 b->saved_values = 0; 2349 #if defined(PETSC_USE_MAT_SINGLE) 2350 b->setvalueslen = 0; 2351 b->setvaluescopy = PETSC_NULL; 2352 #endif 2353 2354 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2355 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2356 2357 b->sorted = PETSC_FALSE; 2358 b->roworiented = PETSC_TRUE; 2359 b->nonew = 0; 2360 b->diag = 0; 2361 b->solve_work = 0; 2362 b->mult_work = 0; 2363 B->spptr = 0; 2364 B->info.nz_unneeded = (PetscReal)b->maxnz; 2365 b->keepzeroedrows = PETSC_FALSE; 2366 b->xtoy = 0; 2367 b->XtoY = 0; 2368 b->compressedrow.use = PETSC_FALSE; 2369 b->compressedrow.nrows = 0; 2370 b->compressedrow.i = PETSC_NULL; 2371 b->compressedrow.rindex = PETSC_NULL; 2372 b->compressedrow.checked = PETSC_FALSE; 2373 B->same_nonzero = PETSC_FALSE; 2374 2375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2376 "MatStoreValues_SeqBAIJ", 2377 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2379 "MatRetrieveValues_SeqBAIJ", 2380 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2382 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2383 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2385 "MatConvert_SeqBAIJ_SeqAIJ", 2386 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2387 #if !defined(PETSC_USE_64BIT_INT) 2388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2389 "MatConvert_SeqBAIJ_SeqSBAIJ", 2390 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2391 #endif 2392 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2393 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2394 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 EXTERN_C_END 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2401 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2402 { 2403 Mat C; 2404 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2405 PetscErrorCode ierr; 2406 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2407 2408 PetscFunctionBegin; 2409 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2410 2411 *B = 0; 2412 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2413 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2414 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2415 c = (Mat_SeqBAIJ*)C->data; 2416 2417 C->M = A->M; 2418 C->N = A->N; 2419 C->bs = A->bs; 2420 c->bs2 = a->bs2; 2421 c->mbs = a->mbs; 2422 c->nbs = a->nbs; 2423 2424 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2425 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2426 for (i=0; i<mbs; i++) { 2427 c->imax[i] = a->imax[i]; 2428 c->ilen[i] = a->ilen[i]; 2429 } 2430 2431 /* allocate the matrix space */ 2432 c->singlemalloc = PETSC_TRUE; 2433 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2434 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2435 c->j = (PetscInt*)(c->a + nz*bs2); 2436 c->i = c->j + nz; 2437 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2438 if (mbs > 0) { 2439 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2440 if (cpvalues == MAT_COPY_VALUES) { 2441 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2442 } else { 2443 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2444 } 2445 } 2446 2447 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2448 c->sorted = a->sorted; 2449 c->roworiented = a->roworiented; 2450 c->nonew = a->nonew; 2451 2452 if (a->diag) { 2453 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2454 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2455 for (i=0; i<mbs; i++) { 2456 c->diag[i] = a->diag[i]; 2457 } 2458 } else c->diag = 0; 2459 c->nz = a->nz; 2460 c->maxnz = a->maxnz; 2461 c->solve_work = 0; 2462 c->mult_work = 0; 2463 C->preallocated = PETSC_TRUE; 2464 C->assembled = PETSC_TRUE; 2465 2466 c->compressedrow.use = a->compressedrow.use; 2467 c->compressedrow.nrows = a->compressedrow.nrows; 2468 c->compressedrow.checked = a->compressedrow.checked; 2469 if ( a->compressedrow.checked && a->compressedrow.use){ 2470 i = a->compressedrow.nrows; 2471 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2472 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2473 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2474 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2475 } else { 2476 c->compressedrow.use = PETSC_FALSE; 2477 c->compressedrow.i = PETSC_NULL; 2478 c->compressedrow.rindex = PETSC_NULL; 2479 } 2480 C->same_nonzero = A->same_nonzero; 2481 *B = C; 2482 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatLoad_SeqBAIJ" 2488 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2489 { 2490 Mat_SeqBAIJ *a; 2491 Mat B; 2492 PetscErrorCode ierr; 2493 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2494 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2495 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2496 PetscInt *masked,nmask,tmp,bs2,ishift; 2497 PetscMPIInt size; 2498 int fd; 2499 PetscScalar *aa; 2500 MPI_Comm comm = ((PetscObject)viewer)->comm; 2501 2502 PetscFunctionBegin; 2503 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2504 bs2 = bs*bs; 2505 2506 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2507 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2508 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2509 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2510 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2511 M = header[1]; N = header[2]; nz = header[3]; 2512 2513 if (header[3] < 0) { 2514 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2515 } 2516 2517 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2518 2519 /* 2520 This code adds extra rows to make sure the number of rows is 2521 divisible by the blocksize 2522 */ 2523 mbs = M/bs; 2524 extra_rows = bs - M + bs*(mbs); 2525 if (extra_rows == bs) extra_rows = 0; 2526 else mbs++; 2527 if (extra_rows) { 2528 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2529 } 2530 2531 /* read in row lengths */ 2532 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2533 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2534 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2535 2536 /* read in column indices */ 2537 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2538 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2539 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2540 2541 /* loop over row lengths determining block row lengths */ 2542 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2543 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2544 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2545 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2546 masked = mask + mbs; 2547 rowcount = 0; nzcount = 0; 2548 for (i=0; i<mbs; i++) { 2549 nmask = 0; 2550 for (j=0; j<bs; j++) { 2551 kmax = rowlengths[rowcount]; 2552 for (k=0; k<kmax; k++) { 2553 tmp = jj[nzcount++]/bs; 2554 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2555 } 2556 rowcount++; 2557 } 2558 browlengths[i] += nmask; 2559 /* zero out the mask elements we set */ 2560 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2561 } 2562 2563 /* create our matrix */ 2564 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2565 ierr = MatSetType(B,type);CHKERRQ(ierr); 2566 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2567 a = (Mat_SeqBAIJ*)B->data; 2568 2569 /* set matrix "i" values */ 2570 a->i[0] = 0; 2571 for (i=1; i<= mbs; i++) { 2572 a->i[i] = a->i[i-1] + browlengths[i-1]; 2573 a->ilen[i-1] = browlengths[i-1]; 2574 } 2575 a->nz = 0; 2576 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2577 2578 /* read in nonzero values */ 2579 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2580 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2581 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2582 2583 /* set "a" and "j" values into matrix */ 2584 nzcount = 0; jcount = 0; 2585 for (i=0; i<mbs; i++) { 2586 nzcountb = nzcount; 2587 nmask = 0; 2588 for (j=0; j<bs; j++) { 2589 kmax = rowlengths[i*bs+j]; 2590 for (k=0; k<kmax; k++) { 2591 tmp = jj[nzcount++]/bs; 2592 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2593 } 2594 } 2595 /* sort the masked values */ 2596 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2597 2598 /* set "j" values into matrix */ 2599 maskcount = 1; 2600 for (j=0; j<nmask; j++) { 2601 a->j[jcount++] = masked[j]; 2602 mask[masked[j]] = maskcount++; 2603 } 2604 /* set "a" values into matrix */ 2605 ishift = bs2*a->i[i]; 2606 for (j=0; j<bs; j++) { 2607 kmax = rowlengths[i*bs+j]; 2608 for (k=0; k<kmax; k++) { 2609 tmp = jj[nzcountb]/bs ; 2610 block = mask[tmp] - 1; 2611 point = jj[nzcountb] - bs*tmp; 2612 idx = ishift + bs2*block + j + bs*point; 2613 a->a[idx] = (MatScalar)aa[nzcountb++]; 2614 } 2615 } 2616 /* zero out the mask elements we set */ 2617 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2618 } 2619 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2620 2621 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2622 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2623 ierr = PetscFree(aa);CHKERRQ(ierr); 2624 ierr = PetscFree(jj);CHKERRQ(ierr); 2625 ierr = PetscFree(mask);CHKERRQ(ierr); 2626 2627 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2628 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 ierr = MatView_Private(B);CHKERRQ(ierr); 2630 2631 *A = B; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatCreateSeqBAIJ" 2637 /*@C 2638 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2639 compressed row) format. For good matrix assembly performance the 2640 user should preallocate the matrix storage by setting the parameter nz 2641 (or the array nnz). By setting these parameters accurately, performance 2642 during matrix assembly can be increased by more than a factor of 50. 2643 2644 Collective on MPI_Comm 2645 2646 Input Parameters: 2647 + comm - MPI communicator, set to PETSC_COMM_SELF 2648 . bs - size of block 2649 . m - number of rows 2650 . n - number of columns 2651 . nz - number of nonzero blocks per block row (same for all rows) 2652 - nnz - array containing the number of nonzero blocks in the various block rows 2653 (possibly different for each block row) or PETSC_NULL 2654 2655 Output Parameter: 2656 . A - the matrix 2657 2658 Options Database Keys: 2659 . -mat_no_unroll - uses code that does not unroll the loops in the 2660 block calculations (much slower) 2661 . -mat_block_size - size of the blocks to use 2662 2663 Level: intermediate 2664 2665 Notes: 2666 The number of rows and columns must be divisible by blocksize. 2667 2668 If the nnz parameter is given then the nz parameter is ignored 2669 2670 A nonzero block is any block that as 1 or more nonzeros in it 2671 2672 The block AIJ format is fully compatible with standard Fortran 77 2673 storage. That is, the stored row and column indices can begin at 2674 either one (as in Fortran) or zero. See the users' manual for details. 2675 2676 Specify the preallocated storage with either nz or nnz (not both). 2677 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2678 allocation. For additional details, see the users manual chapter on 2679 matrices. 2680 2681 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2682 @*/ 2683 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2684 { 2685 PetscErrorCode ierr; 2686 2687 PetscFunctionBegin; 2688 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2689 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2690 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2696 /*@C 2697 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2698 per row in the matrix. For good matrix assembly performance the 2699 user should preallocate the matrix storage by setting the parameter nz 2700 (or the array nnz). By setting these parameters accurately, performance 2701 during matrix assembly can be increased by more than a factor of 50. 2702 2703 Collective on MPI_Comm 2704 2705 Input Parameters: 2706 + A - the matrix 2707 . bs - size of block 2708 . nz - number of block nonzeros per block row (same for all rows) 2709 - nnz - array containing the number of block nonzeros in the various block rows 2710 (possibly different for each block row) or PETSC_NULL 2711 2712 Options Database Keys: 2713 . -mat_no_unroll - uses code that does not unroll the loops in the 2714 block calculations (much slower) 2715 . -mat_block_size - size of the blocks to use 2716 2717 Level: intermediate 2718 2719 Notes: 2720 If the nnz parameter is given then the nz parameter is ignored 2721 2722 The block AIJ format is fully compatible with standard Fortran 77 2723 storage. That is, the stored row and column indices can begin at 2724 either one (as in Fortran) or zero. See the users' manual for details. 2725 2726 Specify the preallocated storage with either nz or nnz (not both). 2727 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2728 allocation. For additional details, see the users manual chapter on 2729 matrices. 2730 2731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2732 @*/ 2733 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2734 { 2735 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2736 2737 PetscFunctionBegin; 2738 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2739 if (f) { 2740 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2741 } 2742 PetscFunctionReturn(0); 2743 } 2744 2745