1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 18 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 19 20 PetscFunctionBegin; 21 if (a->idiagvalid) PetscFunctionReturn(0); 22 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 23 diag_offset = a->diag; 24 if (!a->idiag) { 25 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 26 } 27 diag = a->idiag; 28 mdiag = a->idiag+bs*bs*mbs; 29 /* factor and invert each block */ 30 switch (bs){ 31 case 2: 32 for (i=0; i<mbs; i++) { 33 odiag = v + 4*diag_offset[i]; 34 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 35 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 36 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 37 diag += 4; 38 mdiag += 4; 39 } 40 break; 41 case 3: 42 for (i=0; i<mbs; i++) { 43 odiag = v + 9*diag_offset[i]; 44 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 45 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 46 diag[8] = odiag[8]; 47 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 48 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 49 mdiag[8] = odiag[8]; 50 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 51 diag += 9; 52 mdiag += 9; 53 } 54 break; 55 case 4: 56 for (i=0; i<mbs; i++) { 57 odiag = v + 16*diag_offset[i]; 58 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 61 diag += 16; 62 mdiag += 16; 63 } 64 break; 65 case 5: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 25*diag_offset[i]; 68 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 71 diag += 25; 72 mdiag += 25; 73 } 74 break; 75 default: 76 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 77 } 78 a->idiagvalid = PETSC_TRUE; 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 87 PetscScalar *x,x1,x2,s1,s2; 88 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 89 PetscErrorCode ierr; 90 PetscInt m = a->mbs,i,i2,nz,idx; 91 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 175 } 176 } else { 177 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 PetscErrorCode ierr; 192 PetscInt m = a->mbs,i,i2,nz,idx; 193 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 194 195 PetscFunctionBegin; 196 its = its*lits; 197 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 198 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 199 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 200 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 201 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 202 203 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 204 205 diag = a->diag; 206 idiag = a->idiag; 207 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 210 if (flag & SOR_ZERO_INITIAL_GUESS) { 211 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 212 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 213 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 214 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 215 i2 = 3; 216 idiag += 9; 217 for (i=1; i<m; i++) { 218 v = aa + 9*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 222 while (nz--) { 223 idx = 3*(*vi++); 224 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 225 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 226 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 227 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 228 v += 9; 229 } 230 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 231 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 232 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 233 idiag += 9; 234 i2 += 3; 235 } 236 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 237 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 238 } 239 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 240 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 241 i2 = 0; 242 mdiag = a->idiag+9*a->mbs; 243 for (i=0; i<m; i++) { 244 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 245 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 246 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 247 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 248 mdiag += 9; 249 i2 += 3; 250 } 251 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 252 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 253 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 256 idiag = a->idiag+9*a->mbs - 9; 257 i2 = 3*m - 3; 258 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 259 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 260 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 261 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 262 idiag -= 9; 263 i2 -= 3; 264 for (i=m-2; i>=0; i--) { 265 v = aa + 9*(diag[i]+1); 266 vi = aj + diag[i] + 1; 267 nz = ai[i+1] - diag[i] - 1; 268 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 269 while (nz--) { 270 idx = 3*(*vi++); 271 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 272 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 273 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 274 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 275 v += 9; 276 } 277 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 278 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 279 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 280 idiag -= 9; 281 i2 -= 3; 282 } 283 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 284 } 285 } else { 286 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 287 } 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 296 { 297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 298 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 299 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 300 PetscErrorCode ierr; 301 PetscInt m = a->mbs,i,i2,nz,idx; 302 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 303 304 PetscFunctionBegin; 305 its = its*lits; 306 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 307 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 308 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 309 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 310 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 311 312 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 313 314 diag = a->diag; 315 idiag = a->idiag; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 318 319 if (flag & SOR_ZERO_INITIAL_GUESS) { 320 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 321 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 322 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 323 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 324 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 325 i2 = 4; 326 idiag += 16; 327 for (i=1; i<m; i++) { 328 v = aa + 16*ai[i]; 329 vi = aj + ai[i]; 330 nz = diag[i] - ai[i]; 331 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 332 while (nz--) { 333 idx = 4*(*vi++); 334 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 335 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 336 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 337 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 338 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 339 v += 16; 340 } 341 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 342 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 343 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 344 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 345 idiag += 16; 346 i2 += 4; 347 } 348 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 349 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 350 } 351 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 352 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 353 i2 = 0; 354 mdiag = a->idiag+16*a->mbs; 355 for (i=0; i<m; i++) { 356 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 357 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 358 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 359 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 360 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 361 mdiag += 16; 362 i2 += 4; 363 } 364 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 365 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 366 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 367 } 368 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 369 idiag = a->idiag+16*a->mbs - 16; 370 i2 = 4*m - 4; 371 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 372 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 373 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 374 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 375 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 376 idiag -= 16; 377 i2 -= 4; 378 for (i=m-2; i>=0; i--) { 379 v = aa + 16*(diag[i]+1); 380 vi = aj + diag[i] + 1; 381 nz = ai[i+1] - diag[i] - 1; 382 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 383 while (nz--) { 384 idx = 4*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 386 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 387 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 388 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 389 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 390 v += 16; 391 } 392 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 393 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 394 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 395 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 396 idiag -= 16; 397 i2 -= 4; 398 } 399 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 400 } 401 } else { 402 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 403 } 404 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 414 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 415 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 416 PetscErrorCode ierr; 417 PetscInt m = a->mbs,i,i2,nz,idx; 418 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 419 420 PetscFunctionBegin; 421 its = its*lits; 422 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 423 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 424 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 425 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 426 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 427 428 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 429 430 diag = a->diag; 431 idiag = a->idiag; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 434 435 if (flag & SOR_ZERO_INITIAL_GUESS) { 436 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 437 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 438 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 439 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 440 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 441 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 442 i2 = 5; 443 idiag += 25; 444 for (i=1; i<m; i++) { 445 v = aa + 25*ai[i]; 446 vi = aj + ai[i]; 447 nz = diag[i] - ai[i]; 448 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 449 while (nz--) { 450 idx = 5*(*vi++); 451 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 452 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 453 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 454 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 455 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 456 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 457 v += 25; 458 } 459 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 460 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 461 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 462 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 463 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 464 idiag += 25; 465 i2 += 5; 466 } 467 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 468 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 469 } 470 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 471 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 472 i2 = 0; 473 mdiag = a->idiag+25*a->mbs; 474 for (i=0; i<m; i++) { 475 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 476 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 477 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 478 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 479 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 480 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 481 mdiag += 25; 482 i2 += 5; 483 } 484 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 485 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 486 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 489 idiag = a->idiag+25*a->mbs - 25; 490 i2 = 5*m - 5; 491 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 492 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 493 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 494 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 495 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 496 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 497 idiag -= 25; 498 i2 -= 5; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 25*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 504 while (nz--) { 505 idx = 5*(*vi++); 506 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 507 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 508 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 509 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 510 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 511 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 512 v += 25; 513 } 514 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 515 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 516 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 517 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 518 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 519 idiag -= 25; 520 i2 -= 5; 521 } 522 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 523 } 524 } else { 525 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 526 } 527 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 Special version for Fun3d sequential benchmark 534 */ 535 #if defined(PETSC_HAVE_FORTRAN_CAPS) 536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 538 #define matsetvaluesblocked4_ matsetvaluesblocked4 539 #endif 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "matsetvaluesblocked4_" 544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 545 { 546 Mat A = *AA; 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 549 PetscInt *ai=a->i,*ailen=a->ilen; 550 PetscInt *aj=a->j,stepval; 551 const PetscScalar *value = v; 552 MatScalar *ap,*aa = a->a,*bap; 553 554 PetscFunctionBegin; 555 stepval = (n-1)*4; 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 rp = aj + ai[row]; 559 ap = aa + 16*ai[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 col = in[l]; 564 value = v + k*(stepval+4)*4 + l*4; 565 low = 0; high = nrow; 566 while (high-low > 7) { 567 t = (low+high)/2; 568 if (rp[t] > col) high = t; 569 else low = t; 570 } 571 for (i=low; i<high; i++) { 572 if (rp[i] > col) break; 573 if (rp[i] == col) { 574 bap = ap + 16*i; 575 for (ii=0; ii<4; ii++,value+=stepval) { 576 for (jj=ii; jj<16; jj+=4) { 577 bap[jj] += *value++; 578 } 579 } 580 goto noinsert2; 581 } 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for (ii=N; ii>=i; ii--) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 588 } 589 if (N >= i) { 590 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 591 } 592 rp[i] = col; 593 bap = ap + 16*i; 594 for (ii=0; ii<4; ii++,value+=stepval) { 595 for (jj=ii; jj<16; jj+=4) { 596 bap[jj] = *value++; 597 } 598 } 599 noinsert2:; 600 low = i; 601 } 602 ailen[row] = nrow; 603 } 604 } 605 EXTERN_C_END 606 607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 608 #define matsetvalues4_ MATSETVALUES4 609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 610 #define matsetvalues4_ matsetvalues4 611 #endif 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatSetValues4_" 616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 617 { 618 Mat A = *AA; 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 621 PetscInt *ai=a->i,*ailen=a->ilen; 622 PetscInt *aj=a->j,brow,bcol; 623 PetscInt ridx,cidx; 624 MatScalar *ap,value,*aa=a->a,*bap; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over added rows */ 628 row = im[k]; brow = row/4; 629 rp = aj + ai[brow]; 630 ap = aa + 16*ai[brow]; 631 nrow = ailen[brow]; 632 low = 0; 633 for (l=0; l<n; l++) { /* loop over added columns */ 634 col = in[l]; bcol = col/4; 635 ridx = row % 4; cidx = col % 4; 636 value = v[l + k*n]; 637 low = 0; high = nrow; 638 while (high-low > 7) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 bap = ap + 16*i + 4*cidx + ridx; 647 *bap += value; 648 goto noinsert1; 649 } 650 } 651 N = nrow++ - 1; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 656 } 657 if (N>=i) { 658 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 659 } 660 rp[i] = bcol; 661 ap[16*i + 4*cidx + ridx] = value; 662 noinsert1:; 663 low = i; 664 } 665 ailen[brow] = nrow; 666 } 667 } 668 EXTERN_C_END 669 670 /* UGLY, ugly, ugly 671 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 672 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 673 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 674 converts the entries into single precision and then calls ..._MatScalar() to put them 675 into the single precision data structures. 676 */ 677 #if defined(PETSC_USE_MAT_SINGLE) 678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 679 #else 680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 681 #endif 682 683 #define CHUNKSIZE 10 684 685 /* 686 Checks for missing diagonals 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt *diag,*jj = a->j,i; 695 696 PetscFunctionBegin; 697 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 698 diag = a->diag; 699 for (i=0; i<a->mbs; i++) { 700 if (jj[diag[i]] != i) { 701 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt i,j,*diag,m = a->mbs; 714 715 PetscFunctionBegin; 716 if (a->diag) PetscFunctionReturn(0); 717 718 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 719 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 720 for (i=0; i<m; i++) { 721 diag[i] = a->i[i+1]; 722 for (j=a->i[i]; j<a->i[i+1]; j++) { 723 if (a->j[j] == i) { 724 diag[i] = j; 725 break; 726 } 727 } 728 } 729 a->diag = diag; 730 PetscFunctionReturn(0); 731 } 732 733 734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 741 PetscErrorCode ierr; 742 PetscInt n = a->mbs,i; 743 744 PetscFunctionBegin; 745 *nn = n; 746 if (!ia) PetscFunctionReturn(0); 747 if (symmetric) { 748 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 749 } else if (oshift == 1) { 750 /* temporarily add 1 to i and j indices */ 751 PetscInt nz = a->i[n]; 752 for (i=0; i<nz; i++) a->j[i]++; 753 for (i=0; i<n+1; i++) a->i[i]++; 754 *ia = a->i; *ja = a->j; 755 } else { 756 *ia = a->i; *ja = a->j; 757 } 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 765 { 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscInt i,n = a->mbs; 769 770 PetscFunctionBegin; 771 if (!ia) PetscFunctionReturn(0); 772 if (symmetric) { 773 ierr = PetscFree(*ia);CHKERRQ(ierr); 774 ierr = PetscFree(*ja);CHKERRQ(ierr); 775 } else if (oshift == 1) { 776 PetscInt nz = a->i[n]-1; 777 for (i=0; i<nz; i++) a->j[i]--; 778 for (i=0; i<n+1; i++) a->i[i]--; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatDestroy_SeqBAIJ" 785 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 786 { 787 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 #if defined(PETSC_USE_LOG) 792 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 793 #endif 794 ierr = PetscFree(a->a);CHKERRQ(ierr); 795 if (!a->singlemalloc) { 796 ierr = PetscFree(a->i);CHKERRQ(ierr); 797 ierr = PetscFree(a->j);CHKERRQ(ierr); 798 } 799 if (a->row) { 800 ierr = ISDestroy(a->row);CHKERRQ(ierr); 801 } 802 if (a->col) { 803 ierr = ISDestroy(a->col);CHKERRQ(ierr); 804 } 805 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 806 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 807 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 808 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 809 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 810 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 811 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 812 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 813 #if defined(PETSC_USE_MAT_SINGLE) 814 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 815 #endif 816 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 817 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 818 819 ierr = PetscFree(a);CHKERRQ(ierr); 820 821 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 822 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 823 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 824 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 825 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 826 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatSetOption_SeqBAIJ" 832 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 switch (op) { 839 case MAT_ROW_ORIENTED: 840 a->roworiented = PETSC_TRUE; 841 break; 842 case MAT_COLUMN_ORIENTED: 843 a->roworiented = PETSC_FALSE; 844 break; 845 case MAT_COLUMNS_SORTED: 846 a->sorted = PETSC_TRUE; 847 break; 848 case MAT_COLUMNS_UNSORTED: 849 a->sorted = PETSC_FALSE; 850 break; 851 case MAT_KEEP_ZEROED_ROWS: 852 a->keepzeroedrows = PETSC_TRUE; 853 break; 854 case MAT_NO_NEW_NONZERO_LOCATIONS: 855 a->nonew = 1; 856 break; 857 case MAT_NEW_NONZERO_LOCATION_ERR: 858 a->nonew = -1; 859 break; 860 case MAT_NEW_NONZERO_ALLOCATION_ERR: 861 a->nonew = -2; 862 break; 863 case MAT_YES_NEW_NONZERO_LOCATIONS: 864 a->nonew = 0; 865 break; 866 case MAT_ROWS_SORTED: 867 case MAT_ROWS_UNSORTED: 868 case MAT_YES_NEW_DIAGONALS: 869 case MAT_IGNORE_OFF_PROC_ENTRIES: 870 case MAT_USE_HASH_TABLE: 871 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 872 break; 873 case MAT_NO_NEW_DIAGONALS: 874 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 875 case MAT_SYMMETRIC: 876 case MAT_STRUCTURALLY_SYMMETRIC: 877 case MAT_NOT_SYMMETRIC: 878 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 879 case MAT_HERMITIAN: 880 case MAT_NOT_HERMITIAN: 881 case MAT_SYMMETRY_ETERNAL: 882 case MAT_NOT_SYMMETRY_ETERNAL: 883 break; 884 default: 885 SETERRQ(PETSC_ERR_SUP,"unknown option"); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatGetRow_SeqBAIJ" 892 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 893 { 894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 895 PetscErrorCode ierr; 896 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 897 MatScalar *aa,*aa_i; 898 PetscScalar *v_i; 899 900 PetscFunctionBegin; 901 bs = A->bs; 902 ai = a->i; 903 aj = a->j; 904 aa = a->a; 905 bs2 = a->bs2; 906 907 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 908 909 bn = row/bs; /* Block number */ 910 bp = row % bs; /* Block Position */ 911 M = ai[bn+1] - ai[bn]; 912 *nz = bs*M; 913 914 if (v) { 915 *v = 0; 916 if (*nz) { 917 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 918 for (i=0; i<M; i++) { /* for each block in the block row */ 919 v_i = *v + i*bs; 920 aa_i = aa + bs2*(ai[bn] + i); 921 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 922 } 923 } 924 } 925 926 if (idx) { 927 *idx = 0; 928 if (*nz) { 929 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 930 for (i=0; i<M; i++) { /* for each block in the block row */ 931 idx_i = *idx + i*bs; 932 itmp = bs*aj[ai[bn] + i]; 933 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 934 } 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 942 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 943 { 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 948 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatTranspose_SeqBAIJ" 954 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 955 { 956 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 957 Mat C; 958 PetscErrorCode ierr; 959 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 960 PetscInt *rows,*cols,bs2=a->bs2; 961 PetscScalar *array; 962 963 PetscFunctionBegin; 964 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 965 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 966 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 967 968 #if defined(PETSC_USE_MAT_SINGLE) 969 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 970 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 971 #else 972 array = a->a; 973 #endif 974 975 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 976 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 977 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 978 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 979 ierr = PetscFree(col);CHKERRQ(ierr); 980 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 981 cols = rows + bs; 982 for (i=0; i<mbs; i++) { 983 cols[0] = i*bs; 984 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 985 len = ai[i+1] - ai[i]; 986 for (j=0; j<len; j++) { 987 rows[0] = (*aj++)*bs; 988 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 989 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 990 array += bs2; 991 } 992 } 993 ierr = PetscFree(rows);CHKERRQ(ierr); 994 #if defined(PETSC_USE_MAT_SINGLE) 995 ierr = PetscFree(array); 996 #endif 997 998 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 1001 if (B) { 1002 *B = C; 1003 } else { 1004 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1011 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1012 { 1013 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1016 int fd; 1017 PetscScalar *aa; 1018 FILE *file; 1019 1020 PetscFunctionBegin; 1021 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1022 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1023 col_lens[0] = MAT_FILE_COOKIE; 1024 1025 col_lens[1] = A->m; 1026 col_lens[2] = A->n; 1027 col_lens[3] = a->nz*bs2; 1028 1029 /* store lengths of each row and write (including header) to file */ 1030 count = 0; 1031 for (i=0; i<a->mbs; i++) { 1032 for (j=0; j<bs; j++) { 1033 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1034 } 1035 } 1036 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1037 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1038 1039 /* store column indices (zero start index) */ 1040 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1041 count = 0; 1042 for (i=0; i<a->mbs; i++) { 1043 for (j=0; j<bs; j++) { 1044 for (k=a->i[i]; k<a->i[i+1]; k++) { 1045 for (l=0; l<bs; l++) { 1046 jj[count++] = bs*a->j[k] + l; 1047 } 1048 } 1049 } 1050 } 1051 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1052 ierr = PetscFree(jj);CHKERRQ(ierr); 1053 1054 /* store nonzero values */ 1055 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1056 count = 0; 1057 for (i=0; i<a->mbs; i++) { 1058 for (j=0; j<bs; j++) { 1059 for (k=a->i[i]; k<a->i[i+1]; k++) { 1060 for (l=0; l<bs; l++) { 1061 aa[count++] = a->a[bs2*k + l*bs + j]; 1062 } 1063 } 1064 } 1065 } 1066 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1067 ierr = PetscFree(aa);CHKERRQ(ierr); 1068 1069 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1070 if (file) { 1071 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1078 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1079 { 1080 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1083 PetscViewerFormat format; 1084 1085 PetscFunctionBegin; 1086 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1087 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1088 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1089 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1090 Mat aij; 1091 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1092 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1093 ierr = MatDestroy(aij);CHKERRQ(ierr); 1094 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1095 PetscFunctionReturn(0); 1096 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1097 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1098 for (i=0; i<a->mbs; i++) { 1099 for (j=0; j<bs; j++) { 1100 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1101 for (k=a->i[i]; k<a->i[i+1]; k++) { 1102 for (l=0; l<bs; l++) { 1103 #if defined(PETSC_USE_COMPLEX) 1104 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1105 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1106 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1107 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1108 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1109 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1110 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1111 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1112 } 1113 #else 1114 if (a->a[bs2*k + l*bs + j] != 0.0) { 1115 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1116 } 1117 #endif 1118 } 1119 } 1120 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1121 } 1122 } 1123 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1124 } else { 1125 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1126 for (i=0; i<a->mbs; i++) { 1127 for (j=0; j<bs; j++) { 1128 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1129 for (k=a->i[i]; k<a->i[i+1]; k++) { 1130 for (l=0; l<bs; l++) { 1131 #if defined(PETSC_USE_COMPLEX) 1132 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1133 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1134 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1135 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1136 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1137 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1138 } else { 1139 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1140 } 1141 #else 1142 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1143 #endif 1144 } 1145 } 1146 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1147 } 1148 } 1149 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1150 } 1151 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1157 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1158 { 1159 Mat A = (Mat) Aa; 1160 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1161 PetscErrorCode ierr; 1162 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1163 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1164 MatScalar *aa; 1165 PetscViewer viewer; 1166 1167 PetscFunctionBegin; 1168 1169 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1170 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1171 1172 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1173 1174 /* loop over matrix elements drawing boxes */ 1175 color = PETSC_DRAW_BLUE; 1176 for (i=0,row=0; i<mbs; i++,row+=bs) { 1177 for (j=a->i[i]; j<a->i[i+1]; j++) { 1178 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1179 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1180 aa = a->a + j*bs2; 1181 for (k=0; k<bs; k++) { 1182 for (l=0; l<bs; l++) { 1183 if (PetscRealPart(*aa++) >= 0.) continue; 1184 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1185 } 1186 } 1187 } 1188 } 1189 color = PETSC_DRAW_CYAN; 1190 for (i=0,row=0; i<mbs; i++,row+=bs) { 1191 for (j=a->i[i]; j<a->i[i+1]; j++) { 1192 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1193 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1194 aa = a->a + j*bs2; 1195 for (k=0; k<bs; k++) { 1196 for (l=0; l<bs; l++) { 1197 if (PetscRealPart(*aa++) != 0.) continue; 1198 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1199 } 1200 } 1201 } 1202 } 1203 1204 color = PETSC_DRAW_RED; 1205 for (i=0,row=0; i<mbs; i++,row+=bs) { 1206 for (j=a->i[i]; j<a->i[i+1]; j++) { 1207 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1208 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1209 aa = a->a + j*bs2; 1210 for (k=0; k<bs; k++) { 1211 for (l=0; l<bs; l++) { 1212 if (PetscRealPart(*aa++) <= 0.) continue; 1213 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1214 } 1215 } 1216 } 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1223 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1224 { 1225 PetscErrorCode ierr; 1226 PetscReal xl,yl,xr,yr,w,h; 1227 PetscDraw draw; 1228 PetscTruth isnull; 1229 1230 PetscFunctionBegin; 1231 1232 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1233 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1234 1235 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1236 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1237 xr += w; yr += h; xl = -w; yl = -h; 1238 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1239 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1240 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatView_SeqBAIJ" 1246 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1247 { 1248 PetscErrorCode ierr; 1249 PetscTruth iascii,isbinary,isdraw; 1250 1251 PetscFunctionBegin; 1252 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1253 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1254 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1255 if (iascii){ 1256 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1259 } else if (isdraw) { 1260 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1261 } else { 1262 Mat B; 1263 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1264 ierr = MatView(B,viewer);CHKERRQ(ierr); 1265 ierr = MatDestroy(B);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1273 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1274 { 1275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1276 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1277 PetscInt *ai = a->i,*ailen = a->ilen; 1278 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1279 MatScalar *ap,*aa = a->a,zero = 0.0; 1280 1281 PetscFunctionBegin; 1282 for (k=0; k<m; k++) { /* loop over rows */ 1283 row = im[k]; brow = row/bs; 1284 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1285 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1286 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1287 nrow = ailen[brow]; 1288 for (l=0; l<n; l++) { /* loop over columns */ 1289 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1290 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1291 col = in[l] ; 1292 bcol = col/bs; 1293 cidx = col%bs; 1294 ridx = row%bs; 1295 high = nrow; 1296 low = 0; /* assume unsorted */ 1297 while (high-low > 5) { 1298 t = (low+high)/2; 1299 if (rp[t] > bcol) high = t; 1300 else low = t; 1301 } 1302 for (i=low; i<high; i++) { 1303 if (rp[i] > bcol) break; 1304 if (rp[i] == bcol) { 1305 *v++ = ap[bs2*i+bs*cidx+ridx]; 1306 goto finished; 1307 } 1308 } 1309 *v++ = zero; 1310 finished:; 1311 } 1312 } 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #if defined(PETSC_USE_MAT_SINGLE) 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1319 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1320 { 1321 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1322 PetscErrorCode ierr; 1323 PetscInt i,N = m*n*b->bs2; 1324 MatScalar *vsingle; 1325 1326 PetscFunctionBegin; 1327 if (N > b->setvalueslen) { 1328 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1329 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1330 b->setvalueslen = N; 1331 } 1332 vsingle = b->setvaluescopy; 1333 for (i=0; i<N; i++) { 1334 vsingle[i] = v[i]; 1335 } 1336 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 #endif 1340 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1344 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1345 { 1346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1347 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1348 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1349 PetscErrorCode ierr; 1350 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1351 PetscTruth roworiented=a->roworiented; 1352 const MatScalar *value = v; 1353 MatScalar *ap,*aa = a->a,*bap; 1354 1355 PetscFunctionBegin; 1356 if (roworiented) { 1357 stepval = (n-1)*bs; 1358 } else { 1359 stepval = (m-1)*bs; 1360 } 1361 for (k=0; k<m; k++) { /* loop over added rows */ 1362 row = im[k]; 1363 if (row < 0) continue; 1364 #if defined(PETSC_USE_DEBUG) 1365 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1366 #endif 1367 rp = aj + ai[row]; 1368 ap = aa + bs2*ai[row]; 1369 rmax = imax[row]; 1370 nrow = ailen[row]; 1371 low = 0; 1372 high = nrow; 1373 for (l=0; l<n; l++) { /* loop over added columns */ 1374 if (in[l] < 0) continue; 1375 #if defined(PETSC_USE_DEBUG) 1376 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1377 #endif 1378 col = in[l]; 1379 if (roworiented) { 1380 value = v + k*(stepval+bs)*bs + l*bs; 1381 } else { 1382 value = v + l*(stepval+bs)*bs + k*bs; 1383 } 1384 if (col < lastcol) low = 0; else high = nrow; 1385 lastcol = col; 1386 while (high-low > 7) { 1387 t = (low+high)/2; 1388 if (rp[t] > col) high = t; 1389 else low = t; 1390 } 1391 for (i=low; i<high; i++) { 1392 if (rp[i] > col) break; 1393 if (rp[i] == col) { 1394 bap = ap + bs2*i; 1395 if (roworiented) { 1396 if (is == ADD_VALUES) { 1397 for (ii=0; ii<bs; ii++,value+=stepval) { 1398 for (jj=ii; jj<bs2; jj+=bs) { 1399 bap[jj] += *value++; 1400 } 1401 } 1402 } else { 1403 for (ii=0; ii<bs; ii++,value+=stepval) { 1404 for (jj=ii; jj<bs2; jj+=bs) { 1405 bap[jj] = *value++; 1406 } 1407 } 1408 } 1409 } else { 1410 if (is == ADD_VALUES) { 1411 for (ii=0; ii<bs; ii++,value+=stepval) { 1412 for (jj=0; jj<bs; jj++) { 1413 *bap++ += *value++; 1414 } 1415 } 1416 } else { 1417 for (ii=0; ii<bs; ii++,value+=stepval) { 1418 for (jj=0; jj<bs; jj++) { 1419 *bap++ = *value++; 1420 } 1421 } 1422 } 1423 } 1424 goto noinsert2; 1425 } 1426 } 1427 if (nonew == 1) goto noinsert2; 1428 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1429 if (nrow >= rmax) { 1430 /* there is no extra room in row, therefore enlarge */ 1431 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1432 MatScalar *new_a; 1433 1434 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1435 1436 /* malloc new storage space */ 1437 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1438 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1439 new_j = (PetscInt*)(new_a + bs2*new_nz); 1440 new_i = new_j + new_nz; 1441 1442 /* copy over old data into new slots */ 1443 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1444 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1445 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1446 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1447 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1448 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1449 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1450 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1451 /* free up old matrix storage */ 1452 ierr = PetscFree(a->a);CHKERRQ(ierr); 1453 if (!a->singlemalloc) { 1454 ierr = PetscFree(a->i);CHKERRQ(ierr); 1455 ierr = PetscFree(a->j);CHKERRQ(ierr); 1456 } 1457 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1458 a->singlemalloc = PETSC_TRUE; 1459 1460 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1461 rmax = imax[row] = imax[row] + CHUNKSIZE; 1462 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1463 a->maxnz += bs2*CHUNKSIZE; 1464 a->reallocs++; 1465 a->nz++; 1466 } 1467 N = nrow++ - 1; 1468 /* shift up all the later entries in this row */ 1469 for (ii=N; ii>=i; ii--) { 1470 rp[ii+1] = rp[ii]; 1471 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1472 } 1473 if (N >= i) { 1474 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1475 } 1476 rp[i] = col; 1477 bap = ap + bs2*i; 1478 if (roworiented) { 1479 for (ii=0; ii<bs; ii++,value+=stepval) { 1480 for (jj=ii; jj<bs2; jj+=bs) { 1481 bap[jj] = *value++; 1482 } 1483 } 1484 } else { 1485 for (ii=0; ii<bs; ii++,value+=stepval) { 1486 for (jj=0; jj<bs; jj++) { 1487 *bap++ = *value++; 1488 } 1489 } 1490 } 1491 noinsert2:; 1492 low = i; 1493 } 1494 ailen[row] = nrow; 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1501 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1502 { 1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1504 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1505 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1506 PetscErrorCode ierr; 1507 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1508 MatScalar *aa = a->a,*ap; 1509 PetscReal ratio=0.6; 1510 1511 PetscFunctionBegin; 1512 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1513 1514 if (m) rmax = ailen[0]; 1515 for (i=1; i<mbs; i++) { 1516 /* move each row back by the amount of empty slots (fshift) before it*/ 1517 fshift += imax[i-1] - ailen[i-1]; 1518 rmax = PetscMax(rmax,ailen[i]); 1519 if (fshift) { 1520 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1521 N = ailen[i]; 1522 for (j=0; j<N; j++) { 1523 ip[j-fshift] = ip[j]; 1524 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1525 } 1526 } 1527 ai[i] = ai[i-1] + ailen[i-1]; 1528 } 1529 if (mbs) { 1530 fshift += imax[mbs-1] - ailen[mbs-1]; 1531 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1532 } 1533 /* reset ilen and imax for each row */ 1534 for (i=0; i<mbs; i++) { 1535 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1536 } 1537 a->nz = ai[mbs]; 1538 1539 /* diagonals may have moved, so kill the diagonal pointers */ 1540 a->idiagvalid = PETSC_FALSE; 1541 if (fshift && a->diag) { 1542 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1543 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1544 a->diag = 0; 1545 } 1546 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1547 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1548 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1549 a->reallocs = 0; 1550 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1551 1552 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1553 if (a->compressedrow.use){ 1554 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1555 } 1556 1557 A->same_nonzero = PETSC_TRUE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 /* 1562 This function returns an array of flags which indicate the locations of contiguous 1563 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1564 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1565 Assume: sizes should be long enough to hold all the values. 1566 */ 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1569 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1570 { 1571 PetscInt i,j,k,row; 1572 PetscTruth flg; 1573 1574 PetscFunctionBegin; 1575 for (i=0,j=0; i<n; j++) { 1576 row = idx[i]; 1577 if (row%bs!=0) { /* Not the begining of a block */ 1578 sizes[j] = 1; 1579 i++; 1580 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1581 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1582 i++; 1583 } else { /* Begining of the block, so check if the complete block exists */ 1584 flg = PETSC_TRUE; 1585 for (k=1; k<bs; k++) { 1586 if (row+k != idx[i+k]) { /* break in the block */ 1587 flg = PETSC_FALSE; 1588 break; 1589 } 1590 } 1591 if (flg) { /* No break in the bs */ 1592 sizes[j] = bs; 1593 i+= bs; 1594 } else { 1595 sizes[j] = 1; 1596 i++; 1597 } 1598 } 1599 } 1600 *bs_max = j; 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1606 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1607 { 1608 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1609 PetscErrorCode ierr; 1610 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1611 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1612 PetscScalar zero = 0.0; 1613 MatScalar *aa; 1614 1615 PetscFunctionBegin; 1616 /* Make a copy of the IS and sort it */ 1617 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1618 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1619 1620 /* allocate memory for rows,sizes */ 1621 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1622 sizes = rows + is_n; 1623 1624 /* copy IS values to rows, and sort them */ 1625 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1626 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1627 if (baij->keepzeroedrows) { 1628 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1629 bs_max = is_n; 1630 A->same_nonzero = PETSC_TRUE; 1631 } else { 1632 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1633 A->same_nonzero = PETSC_FALSE; 1634 } 1635 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1636 1637 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1638 row = rows[j]; 1639 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1640 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1641 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1642 if (sizes[i] == bs && !baij->keepzeroedrows) { 1643 if (diag) { 1644 if (baij->ilen[row/bs] > 0) { 1645 baij->ilen[row/bs] = 1; 1646 baij->j[baij->i[row/bs]] = row/bs; 1647 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1648 } 1649 /* Now insert all the diagonal values for this bs */ 1650 for (k=0; k<bs; k++) { 1651 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } else { /* (!diag) */ 1654 baij->ilen[row/bs] = 0; 1655 } /* end (!diag) */ 1656 } else { /* (sizes[i] != bs) */ 1657 #if defined (PETSC_USE_DEBUG) 1658 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1659 #endif 1660 for (k=0; k<count; k++) { 1661 aa[0] = zero; 1662 aa += bs; 1663 } 1664 if (diag) { 1665 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 1670 ierr = PetscFree(rows);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1680 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1681 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1682 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1683 PetscErrorCode ierr; 1684 PetscInt ridx,cidx,bs2=a->bs2; 1685 PetscTruth roworiented=a->roworiented; 1686 MatScalar *ap,value,*aa=a->a,*bap; 1687 1688 PetscFunctionBegin; 1689 for (k=0; k<m; k++) { /* loop over added rows */ 1690 row = im[k]; brow = row/bs; 1691 if (row < 0) continue; 1692 #if defined(PETSC_USE_DEBUG) 1693 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1694 #endif 1695 rp = aj + ai[brow]; 1696 ap = aa + bs2*ai[brow]; 1697 rmax = imax[brow]; 1698 nrow = ailen[brow]; 1699 low = 0; 1700 high = nrow; 1701 for (l=0; l<n; l++) { /* loop over added columns */ 1702 if (in[l] < 0) continue; 1703 #if defined(PETSC_USE_DEBUG) 1704 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1705 #endif 1706 col = in[l]; bcol = col/bs; 1707 ridx = row % bs; cidx = col % bs; 1708 if (roworiented) { 1709 value = v[l + k*n]; 1710 } else { 1711 value = v[k + l*m]; 1712 } 1713 if (col < lastcol) low = 0; else high = nrow; 1714 lastcol = col; 1715 while (high-low > 7) { 1716 t = (low+high)/2; 1717 if (rp[t] > bcol) high = t; 1718 else low = t; 1719 } 1720 for (i=low; i<high; i++) { 1721 if (rp[i] > bcol) break; 1722 if (rp[i] == bcol) { 1723 bap = ap + bs2*i + bs*cidx + ridx; 1724 if (is == ADD_VALUES) *bap += value; 1725 else *bap = value; 1726 goto noinsert1; 1727 } 1728 } 1729 if (nonew == 1) goto noinsert1; 1730 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1731 if (nrow >= rmax) { 1732 /* there is no extra room in row, therefore enlarge */ 1733 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1734 MatScalar *new_a; 1735 1736 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1737 1738 /* Malloc new storage space */ 1739 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1740 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1741 new_j = (PetscInt*)(new_a + bs2*new_nz); 1742 new_i = new_j + new_nz; 1743 1744 /* copy over old data into new slots */ 1745 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1746 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1747 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1748 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1749 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1751 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1752 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1753 /* free up old matrix storage */ 1754 ierr = PetscFree(a->a);CHKERRQ(ierr); 1755 if (!a->singlemalloc) { 1756 ierr = PetscFree(a->i);CHKERRQ(ierr); 1757 ierr = PetscFree(a->j);CHKERRQ(ierr); 1758 } 1759 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1760 a->singlemalloc = PETSC_TRUE; 1761 1762 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1763 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1764 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1765 a->maxnz += bs2*CHUNKSIZE; 1766 a->reallocs++; 1767 a->nz++; 1768 } 1769 N = nrow++ - 1; 1770 /* shift up all the later entries in this row */ 1771 for (ii=N; ii>=i; ii--) { 1772 rp[ii+1] = rp[ii]; 1773 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1774 } 1775 if (N>=i) { 1776 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1777 } 1778 rp[i] = bcol; 1779 ap[bs2*i + bs*cidx + ridx] = value; 1780 noinsert1:; 1781 low = i; 1782 } 1783 ailen[brow] = nrow; 1784 } 1785 A->same_nonzero = PETSC_FALSE; 1786 PetscFunctionReturn(0); 1787 } 1788 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1792 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1793 { 1794 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1795 Mat outA; 1796 PetscErrorCode ierr; 1797 PetscTruth row_identity,col_identity; 1798 1799 PetscFunctionBegin; 1800 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1801 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1802 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1803 if (!row_identity || !col_identity) { 1804 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1805 } 1806 1807 outA = inA; 1808 inA->factor = FACTOR_LU; 1809 1810 if (!a->diag) { 1811 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1812 } 1813 1814 a->row = row; 1815 a->col = col; 1816 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1817 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1818 1819 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1820 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1821 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1822 1823 /* 1824 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1825 for ILU(0) factorization with natural ordering 1826 */ 1827 if (inA->bs < 8) { 1828 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1829 } else { 1830 if (!a->solve_work) { 1831 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1832 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1833 } 1834 } 1835 1836 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1837 1838 PetscFunctionReturn(0); 1839 } 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1842 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1843 { 1844 static PetscTruth called = PETSC_FALSE; 1845 MPI_Comm comm = A->comm; 1846 PetscErrorCode ierr; 1847 1848 PetscFunctionBegin; 1849 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1850 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1851 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 EXTERN_C_BEGIN 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1858 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1859 { 1860 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1861 PetscInt i,nz,nbs; 1862 1863 PetscFunctionBegin; 1864 nz = baij->maxnz/baij->bs2; 1865 nbs = baij->nbs; 1866 for (i=0; i<nz; i++) { 1867 baij->j[i] = indices[i]; 1868 } 1869 baij->nz = nz; 1870 for (i=0; i<nbs; i++) { 1871 baij->ilen[i] = baij->imax[i]; 1872 } 1873 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1880 /*@ 1881 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1882 in the matrix. 1883 1884 Input Parameters: 1885 + mat - the SeqBAIJ matrix 1886 - indices - the column indices 1887 1888 Level: advanced 1889 1890 Notes: 1891 This can be called if you have precomputed the nonzero structure of the 1892 matrix and want to provide it to the matrix object to improve the performance 1893 of the MatSetValues() operation. 1894 1895 You MUST have set the correct numbers of nonzeros per row in the call to 1896 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1897 1898 MUST be called before any calls to MatSetValues(); 1899 1900 @*/ 1901 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1902 { 1903 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1904 1905 PetscFunctionBegin; 1906 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1907 PetscValidPointer(indices,2); 1908 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1909 if (f) { 1910 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1911 } else { 1912 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1919 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1920 { 1921 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1922 PetscErrorCode ierr; 1923 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1924 PetscReal atmp; 1925 PetscScalar *x,zero = 0.0; 1926 MatScalar *aa; 1927 PetscInt ncols,brow,krow,kcol; 1928 1929 PetscFunctionBegin; 1930 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1931 bs = A->bs; 1932 aa = a->a; 1933 ai = a->i; 1934 aj = a->j; 1935 mbs = a->mbs; 1936 1937 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1938 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1939 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1940 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1941 for (i=0; i<mbs; i++) { 1942 ncols = ai[1] - ai[0]; ai++; 1943 brow = bs*i; 1944 for (j=0; j<ncols; j++){ 1945 /* bcol = bs*(*aj); */ 1946 for (kcol=0; kcol<bs; kcol++){ 1947 for (krow=0; krow<bs; krow++){ 1948 atmp = PetscAbsScalar(*aa); aa++; 1949 row = brow + krow; /* row index */ 1950 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1951 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1952 } 1953 } 1954 aj++; 1955 } 1956 } 1957 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1963 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1964 { 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1974 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1975 { 1976 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1977 PetscFunctionBegin; 1978 *array = a->a; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1984 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1985 { 1986 PetscFunctionBegin; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #include "petscblaslapack.h" 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1993 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1994 { 1995 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1996 PetscErrorCode ierr; 1997 PetscInt i,bs=Y->bs,j,bs2; 1998 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1999 2000 PetscFunctionBegin; 2001 if (str == SAME_NONZERO_PATTERN) { 2002 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2003 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2004 if (y->xtoy && y->XtoY != X) { 2005 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2006 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2007 } 2008 if (!y->xtoy) { /* get xtoy */ 2009 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2010 y->XtoY = X; 2011 } 2012 bs2 = bs*bs; 2013 for (i=0; i<x->nz; i++) { 2014 j = 0; 2015 while (j < bs2){ 2016 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2017 j++; 2018 } 2019 } 2020 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2021 } else { 2022 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 /* -------------------------------------------------------------------*/ 2028 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2029 MatGetRow_SeqBAIJ, 2030 MatRestoreRow_SeqBAIJ, 2031 MatMult_SeqBAIJ_N, 2032 /* 4*/ MatMultAdd_SeqBAIJ_N, 2033 MatMultTranspose_SeqBAIJ, 2034 MatMultTransposeAdd_SeqBAIJ, 2035 MatSolve_SeqBAIJ_N, 2036 0, 2037 0, 2038 /*10*/ 0, 2039 MatLUFactor_SeqBAIJ, 2040 0, 2041 0, 2042 MatTranspose_SeqBAIJ, 2043 /*15*/ MatGetInfo_SeqBAIJ, 2044 MatEqual_SeqBAIJ, 2045 MatGetDiagonal_SeqBAIJ, 2046 MatDiagonalScale_SeqBAIJ, 2047 MatNorm_SeqBAIJ, 2048 /*20*/ 0, 2049 MatAssemblyEnd_SeqBAIJ, 2050 0, 2051 MatSetOption_SeqBAIJ, 2052 MatZeroEntries_SeqBAIJ, 2053 /*25*/ MatZeroRows_SeqBAIJ, 2054 MatLUFactorSymbolic_SeqBAIJ, 2055 MatLUFactorNumeric_SeqBAIJ_N, 2056 MatCholeskyFactorSymbolic_SeqBAIJ, 2057 MatCholeskyFactorNumeric_SeqBAIJ_N, 2058 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2059 MatILUFactorSymbolic_SeqBAIJ, 2060 MatICCFactorSymbolic_SeqBAIJ, 2061 MatGetArray_SeqBAIJ, 2062 MatRestoreArray_SeqBAIJ, 2063 /*35*/ MatDuplicate_SeqBAIJ, 2064 0, 2065 0, 2066 MatILUFactor_SeqBAIJ, 2067 0, 2068 /*40*/ MatAXPY_SeqBAIJ, 2069 MatGetSubMatrices_SeqBAIJ, 2070 MatIncreaseOverlap_SeqBAIJ, 2071 MatGetValues_SeqBAIJ, 2072 0, 2073 /*45*/ MatPrintHelp_SeqBAIJ, 2074 MatScale_SeqBAIJ, 2075 0, 2076 0, 2077 0, 2078 /*50*/ 0, 2079 MatGetRowIJ_SeqBAIJ, 2080 MatRestoreRowIJ_SeqBAIJ, 2081 0, 2082 0, 2083 /*55*/ 0, 2084 0, 2085 0, 2086 0, 2087 MatSetValuesBlocked_SeqBAIJ, 2088 /*60*/ MatGetSubMatrix_SeqBAIJ, 2089 MatDestroy_SeqBAIJ, 2090 MatView_SeqBAIJ, 2091 MatGetPetscMaps_Petsc, 2092 0, 2093 /*65*/ 0, 2094 0, 2095 0, 2096 0, 2097 0, 2098 /*70*/ MatGetRowMax_SeqBAIJ, 2099 MatConvert_Basic, 2100 0, 2101 0, 2102 0, 2103 /*75*/ 0, 2104 0, 2105 0, 2106 0, 2107 0, 2108 /*80*/ 0, 2109 0, 2110 0, 2111 0, 2112 MatLoad_SeqBAIJ, 2113 /*85*/ 0, 2114 0, 2115 0, 2116 0, 2117 0, 2118 /*90*/ 0, 2119 0, 2120 0, 2121 0, 2122 0, 2123 /*95*/ 0, 2124 0, 2125 0, 2126 0}; 2127 2128 EXTERN_C_BEGIN 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2131 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2132 { 2133 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2134 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 if (aij->nonew != 1) { 2139 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2140 } 2141 2142 /* allocate space for values if not already there */ 2143 if (!aij->saved_values) { 2144 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2145 } 2146 2147 /* copy values over */ 2148 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 EXTERN_C_END 2152 2153 EXTERN_C_BEGIN 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2156 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2157 { 2158 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2159 PetscErrorCode ierr; 2160 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2161 2162 PetscFunctionBegin; 2163 if (aij->nonew != 1) { 2164 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2165 } 2166 if (!aij->saved_values) { 2167 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2168 } 2169 2170 /* copy values over */ 2171 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 EXTERN_C_END 2175 2176 EXTERN_C_BEGIN 2177 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2178 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2179 EXTERN_C_END 2180 2181 EXTERN_C_BEGIN 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2184 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2185 { 2186 Mat_SeqBAIJ *b; 2187 PetscErrorCode ierr; 2188 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2189 PetscTruth flg; 2190 2191 PetscFunctionBegin; 2192 2193 B->preallocated = PETSC_TRUE; 2194 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2195 if (nnz && newbs != bs) { 2196 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2197 } 2198 bs = newbs; 2199 2200 mbs = B->m/bs; 2201 nbs = B->n/bs; 2202 bs2 = bs*bs; 2203 2204 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2205 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2206 } 2207 2208 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2209 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2210 if (nnz) { 2211 for (i=0; i<mbs; i++) { 2212 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2213 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2214 } 2215 } 2216 2217 b = (Mat_SeqBAIJ*)B->data; 2218 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2219 B->ops->solve = MatSolve_SeqBAIJ_Update; 2220 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2221 if (!flg) { 2222 switch (bs) { 2223 case 1: 2224 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2225 B->ops->mult = MatMult_SeqBAIJ_1; 2226 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2227 break; 2228 case 2: 2229 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2230 B->ops->mult = MatMult_SeqBAIJ_2; 2231 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2232 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2233 break; 2234 case 3: 2235 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2236 B->ops->mult = MatMult_SeqBAIJ_3; 2237 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2238 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2239 break; 2240 case 4: 2241 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2242 B->ops->mult = MatMult_SeqBAIJ_4; 2243 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2244 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2245 break; 2246 case 5: 2247 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2248 B->ops->mult = MatMult_SeqBAIJ_5; 2249 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2250 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2251 break; 2252 case 6: 2253 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2254 B->ops->mult = MatMult_SeqBAIJ_6; 2255 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2256 break; 2257 case 7: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2259 B->ops->mult = MatMult_SeqBAIJ_7; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2261 break; 2262 default: 2263 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2264 B->ops->mult = MatMult_SeqBAIJ_N; 2265 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2266 break; 2267 } 2268 } 2269 B->bs = bs; 2270 b->mbs = mbs; 2271 b->nbs = nbs; 2272 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2273 if (!nnz) { 2274 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2275 else if (nz <= 0) nz = 1; 2276 for (i=0; i<mbs; i++) b->imax[i] = nz; 2277 nz = nz*mbs; 2278 } else { 2279 nz = 0; 2280 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2281 } 2282 2283 /* allocate the matrix space */ 2284 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2285 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2286 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2287 b->j = (PetscInt*)(b->a + nz*bs2); 2288 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2289 b->i = b->j + nz; 2290 b->singlemalloc = PETSC_TRUE; 2291 2292 b->i[0] = 0; 2293 for (i=1; i<mbs+1; i++) { 2294 b->i[i] = b->i[i-1] + b->imax[i-1]; 2295 } 2296 2297 /* b->ilen will count nonzeros in each block row so far. */ 2298 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2299 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2300 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2301 2302 B->bs = bs; 2303 b->bs2 = bs2; 2304 b->mbs = mbs; 2305 b->nz = 0; 2306 b->maxnz = nz*bs2; 2307 B->info.nz_unneeded = (PetscReal)b->maxnz; 2308 PetscFunctionReturn(0); 2309 } 2310 EXTERN_C_END 2311 2312 /*MC 2313 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2314 block sparse compressed row format. 2315 2316 Options Database Keys: 2317 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2318 2319 Level: beginner 2320 2321 .seealso: MatCreateSeqBAIJ 2322 M*/ 2323 2324 EXTERN_C_BEGIN 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatCreate_SeqBAIJ" 2327 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2328 { 2329 PetscErrorCode ierr; 2330 PetscMPIInt size; 2331 Mat_SeqBAIJ *b; 2332 2333 PetscFunctionBegin; 2334 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2335 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2336 2337 B->m = B->M = PetscMax(B->m,B->M); 2338 B->n = B->N = PetscMax(B->n,B->N); 2339 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2340 B->data = (void*)b; 2341 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2342 B->factor = 0; 2343 B->mapping = 0; 2344 b->row = 0; 2345 b->col = 0; 2346 b->icol = 0; 2347 b->reallocs = 0; 2348 b->saved_values = 0; 2349 #if defined(PETSC_USE_MAT_SINGLE) 2350 b->setvalueslen = 0; 2351 b->setvaluescopy = PETSC_NULL; 2352 #endif 2353 2354 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2355 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2356 2357 b->sorted = PETSC_FALSE; 2358 b->roworiented = PETSC_TRUE; 2359 b->nonew = 0; 2360 b->diag = 0; 2361 b->solve_work = 0; 2362 b->mult_work = 0; 2363 B->spptr = 0; 2364 B->info.nz_unneeded = (PetscReal)b->maxnz; 2365 b->keepzeroedrows = PETSC_FALSE; 2366 b->xtoy = 0; 2367 b->XtoY = 0; 2368 b->compressedrow.use = PETSC_FALSE; 2369 b->compressedrow.nrows = 0; 2370 b->compressedrow.i = PETSC_NULL; 2371 b->compressedrow.rindex = PETSC_NULL; 2372 b->compressedrow.checked = PETSC_FALSE; 2373 B->same_nonzero = PETSC_FALSE; 2374 2375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2376 "MatStoreValues_SeqBAIJ", 2377 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2379 "MatRetrieveValues_SeqBAIJ", 2380 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2382 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2383 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2385 "MatConvert_SeqBAIJ_SeqAIJ", 2386 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2387 #if !defined(PETSC_USE_64BIT_INT) 2388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2389 "MatConvert_SeqBAIJ_SeqSBAIJ", 2390 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2391 #endif 2392 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2393 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2394 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 EXTERN_C_END 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2401 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2402 { 2403 Mat C; 2404 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2405 PetscErrorCode ierr; 2406 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2407 2408 PetscFunctionBegin; 2409 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2410 2411 *B = 0; 2412 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2413 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2414 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2415 c = (Mat_SeqBAIJ*)C->data; 2416 2417 C->M = A->M; 2418 C->N = A->N; 2419 C->bs = A->bs; 2420 c->bs2 = a->bs2; 2421 c->mbs = a->mbs; 2422 c->nbs = a->nbs; 2423 2424 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2425 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2426 for (i=0; i<mbs; i++) { 2427 c->imax[i] = a->imax[i]; 2428 c->ilen[i] = a->ilen[i]; 2429 } 2430 2431 /* allocate the matrix space */ 2432 c->singlemalloc = PETSC_TRUE; 2433 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2434 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2435 c->j = (PetscInt*)(c->a + nz*bs2); 2436 c->i = c->j + nz; 2437 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2438 if (mbs > 0) { 2439 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2440 if (cpvalues == MAT_COPY_VALUES) { 2441 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2442 } else { 2443 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2444 } 2445 } 2446 2447 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2448 c->sorted = a->sorted; 2449 c->roworiented = a->roworiented; 2450 c->nonew = a->nonew; 2451 2452 if (a->diag) { 2453 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2454 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2455 for (i=0; i<mbs; i++) { 2456 c->diag[i] = a->diag[i]; 2457 } 2458 } else c->diag = 0; 2459 c->nz = a->nz; 2460 c->maxnz = a->maxnz; 2461 c->solve_work = 0; 2462 c->mult_work = 0; 2463 C->preallocated = PETSC_TRUE; 2464 C->assembled = PETSC_TRUE; 2465 2466 c->compressedrow.use = a->compressedrow.use; 2467 c->compressedrow.nrows = a->compressedrow.nrows; 2468 c->compressedrow.checked = a->compressedrow.checked; 2469 if ( a->compressedrow.checked && a->compressedrow.use){ 2470 i = a->compressedrow.nrows; 2471 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2472 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2473 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2474 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2475 } else { 2476 c->compressedrow.use = PETSC_FALSE; 2477 c->compressedrow.i = PETSC_NULL; 2478 c->compressedrow.rindex = PETSC_NULL; 2479 } 2480 C->same_nonzero = A->same_nonzero; 2481 *B = C; 2482 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatLoad_SeqBAIJ" 2488 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2489 { 2490 Mat_SeqBAIJ *a; 2491 Mat B; 2492 PetscErrorCode ierr; 2493 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2494 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2495 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2496 PetscInt *masked,nmask,tmp,bs2,ishift; 2497 PetscMPIInt size; 2498 int fd; 2499 PetscScalar *aa; 2500 MPI_Comm comm = ((PetscObject)viewer)->comm; 2501 2502 PetscFunctionBegin; 2503 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2504 bs2 = bs*bs; 2505 2506 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2507 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2508 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2509 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2510 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2511 M = header[1]; N = header[2]; nz = header[3]; 2512 2513 if (header[3] < 0) { 2514 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2515 } 2516 2517 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2518 2519 /* 2520 This code adds extra rows to make sure the number of rows is 2521 divisible by the blocksize 2522 */ 2523 mbs = M/bs; 2524 extra_rows = bs - M + bs*(mbs); 2525 if (extra_rows == bs) extra_rows = 0; 2526 else mbs++; 2527 if (extra_rows) { 2528 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2529 } 2530 2531 /* read in row lengths */ 2532 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2533 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2534 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2535 2536 /* read in column indices */ 2537 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2538 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2539 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2540 2541 /* loop over row lengths determining block row lengths */ 2542 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2543 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2544 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2545 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2546 masked = mask + mbs; 2547 rowcount = 0; nzcount = 0; 2548 for (i=0; i<mbs; i++) { 2549 nmask = 0; 2550 for (j=0; j<bs; j++) { 2551 kmax = rowlengths[rowcount]; 2552 for (k=0; k<kmax; k++) { 2553 tmp = jj[nzcount++]/bs; 2554 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2555 } 2556 rowcount++; 2557 } 2558 browlengths[i] += nmask; 2559 /* zero out the mask elements we set */ 2560 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2561 } 2562 2563 /* create our matrix */ 2564 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2565 ierr = MatSetType(B,type);CHKERRQ(ierr); 2566 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2567 a = (Mat_SeqBAIJ*)B->data; 2568 2569 /* set matrix "i" values */ 2570 a->i[0] = 0; 2571 for (i=1; i<= mbs; i++) { 2572 a->i[i] = a->i[i-1] + browlengths[i-1]; 2573 a->ilen[i-1] = browlengths[i-1]; 2574 } 2575 a->nz = 0; 2576 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2577 2578 /* read in nonzero values */ 2579 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2580 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2581 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2582 2583 /* set "a" and "j" values into matrix */ 2584 nzcount = 0; jcount = 0; 2585 for (i=0; i<mbs; i++) { 2586 nzcountb = nzcount; 2587 nmask = 0; 2588 for (j=0; j<bs; j++) { 2589 kmax = rowlengths[i*bs+j]; 2590 for (k=0; k<kmax; k++) { 2591 tmp = jj[nzcount++]/bs; 2592 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2593 } 2594 } 2595 /* sort the masked values */ 2596 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2597 2598 /* set "j" values into matrix */ 2599 maskcount = 1; 2600 for (j=0; j<nmask; j++) { 2601 a->j[jcount++] = masked[j]; 2602 mask[masked[j]] = maskcount++; 2603 } 2604 /* set "a" values into matrix */ 2605 ishift = bs2*a->i[i]; 2606 for (j=0; j<bs; j++) { 2607 kmax = rowlengths[i*bs+j]; 2608 for (k=0; k<kmax; k++) { 2609 tmp = jj[nzcountb]/bs ; 2610 block = mask[tmp] - 1; 2611 point = jj[nzcountb] - bs*tmp; 2612 idx = ishift + bs2*block + j + bs*point; 2613 a->a[idx] = (MatScalar)aa[nzcountb++]; 2614 } 2615 } 2616 /* zero out the mask elements we set */ 2617 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2618 } 2619 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2620 2621 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2622 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2623 ierr = PetscFree(aa);CHKERRQ(ierr); 2624 ierr = PetscFree(jj);CHKERRQ(ierr); 2625 ierr = PetscFree(mask);CHKERRQ(ierr); 2626 2627 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2628 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 ierr = MatView_Private(B);CHKERRQ(ierr); 2630 2631 *A = B; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatCreateSeqBAIJ" 2637 /*@C 2638 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2639 compressed row) format. For good matrix assembly performance the 2640 user should preallocate the matrix storage by setting the parameter nz 2641 (or the array nnz). By setting these parameters accurately, performance 2642 during matrix assembly can be increased by more than a factor of 50. 2643 2644 Collective on MPI_Comm 2645 2646 Input Parameters: 2647 + comm - MPI communicator, set to PETSC_COMM_SELF 2648 . bs - size of block 2649 . m - number of rows 2650 . n - number of columns 2651 . nz - number of nonzero blocks per block row (same for all rows) 2652 - nnz - array containing the number of nonzero blocks in the various block rows 2653 (possibly different for each block row) or PETSC_NULL 2654 2655 Output Parameter: 2656 . A - the matrix 2657 2658 Options Database Keys: 2659 . -mat_no_unroll - uses code that does not unroll the loops in the 2660 block calculations (much slower) 2661 . -mat_block_size - size of the blocks to use 2662 2663 Level: intermediate 2664 2665 Notes: 2666 The number of rows and columns must be divisible by blocksize. 2667 2668 If the nnz parameter is given then the nz parameter is ignored 2669 2670 A nonzero block is any block that as 1 or more nonzeros in it 2671 2672 The block AIJ format is fully compatible with standard Fortran 77 2673 storage. That is, the stored row and column indices can begin at 2674 either one (as in Fortran) or zero. See the users' manual for details. 2675 2676 Specify the preallocated storage with either nz or nnz (not both). 2677 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2678 allocation. For additional details, see the users manual chapter on 2679 matrices. 2680 2681 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2682 @*/ 2683 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2684 { 2685 PetscErrorCode ierr; 2686 2687 PetscFunctionBegin; 2688 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2689 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2690 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2696 /*@C 2697 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2698 per row in the matrix. For good matrix assembly performance the 2699 user should preallocate the matrix storage by setting the parameter nz 2700 (or the array nnz). By setting these parameters accurately, performance 2701 during matrix assembly can be increased by more than a factor of 50. 2702 2703 Collective on MPI_Comm 2704 2705 Input Parameters: 2706 + A - the matrix 2707 . bs - size of block 2708 . nz - number of block nonzeros per block row (same for all rows) 2709 - nnz - array containing the number of block nonzeros in the various block rows 2710 (possibly different for each block row) or PETSC_NULL 2711 2712 Options Database Keys: 2713 . -mat_no_unroll - uses code that does not unroll the loops in the 2714 block calculations (much slower) 2715 . -mat_block_size - size of the blocks to use 2716 2717 Level: intermediate 2718 2719 Notes: 2720 If the nnz parameter is given then the nz parameter is ignored 2721 2722 The block AIJ format is fully compatible with standard Fortran 77 2723 storage. That is, the stored row and column indices can begin at 2724 either one (as in Fortran) or zero. See the users' manual for details. 2725 2726 Specify the preallocated storage with either nz or nnz (not both). 2727 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2728 allocation. For additional details, see the users manual chapter on 2729 matrices. 2730 2731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2732 @*/ 2733 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2734 { 2735 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2736 2737 PetscFunctionBegin; 2738 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2739 if (f) { 2740 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2741 } 2742 PetscFunctionReturn(0); 2743 } 2744 2745