1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 PetscErrorCode ierr; 19 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 20 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 21 22 PetscFunctionBegin; 23 if (a->idiagvalid) PetscFunctionReturn(0); 24 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 25 diag_offset = a->diag; 26 if (!a->idiag) { 27 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 28 } 29 diag = a->idiag; 30 mdiag = a->idiag+bs*bs*mbs; 31 /* factor and invert each block */ 32 switch (bs){ 33 case 2: 34 for (i=0; i<mbs; i++) { 35 odiag = v + 4*diag_offset[i]; 36 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 37 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 38 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 39 diag += 4; 40 mdiag += 4; 41 } 42 break; 43 case 3: 44 for (i=0; i<mbs; i++) { 45 odiag = v + 9*diag_offset[i]; 46 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 47 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 48 diag[8] = odiag[8]; 49 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 50 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 51 mdiag[8] = odiag[8]; 52 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 53 diag += 9; 54 mdiag += 9; 55 } 56 break; 57 case 4: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 16*diag_offset[i]; 60 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 61 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 62 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 63 diag += 16; 64 mdiag += 16; 65 } 66 break; 67 case 5: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 25*diag_offset[i]; 70 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 71 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 72 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 73 diag += 25; 74 mdiag += 25; 75 } 76 break; 77 default: 78 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 79 } 80 a->idiagvalid = PETSC_TRUE; 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 86 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 87 { 88 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 89 PetscScalar *x,x1,x2,s1,s2; 90 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 91 PetscErrorCode ierr; 92 PetscInt m = a->mbs,i,i2,nz,idx; 93 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 94 95 PetscFunctionBegin; 96 its = its*lits; 97 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 98 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 99 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 100 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 101 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 102 103 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 104 105 diag = a->diag; 106 idiag = a->idiag; 107 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 108 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 109 110 if (flag & SOR_ZERO_INITIAL_GUESS) { 111 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 112 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 113 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 114 i2 = 2; 115 idiag += 4; 116 for (i=1; i<m; i++) { 117 v = aa + 4*ai[i]; 118 vi = aj + ai[i]; 119 nz = diag[i] - ai[i]; 120 s1 = b[i2]; s2 = b[i2+1]; 121 while (nz--) { 122 idx = 2*(*vi++); 123 x1 = x[idx]; x2 = x[1+idx]; 124 s1 -= v[0]*x1 + v[2]*x2; 125 s2 -= v[1]*x1 + v[3]*x2; 126 v += 4; 127 } 128 x[i2] = idiag[0]*s1 + idiag[2]*s2; 129 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 130 idiag += 4; 131 i2 += 2; 132 } 133 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 134 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 135 } 136 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 137 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 138 i2 = 0; 139 mdiag = a->idiag+4*a->mbs; 140 for (i=0; i<m; i++) { 141 x1 = x[i2]; x2 = x[i2+1]; 142 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 143 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 144 mdiag += 4; 145 i2 += 2; 146 } 147 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 148 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 149 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 150 } 151 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 152 idiag = a->idiag+4*a->mbs - 4; 153 i2 = 2*m - 2; 154 x1 = x[i2]; x2 = x[i2+1]; 155 x[i2] = idiag[0]*x1 + idiag[2]*x2; 156 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 157 idiag -= 4; 158 i2 -= 2; 159 for (i=m-2; i>=0; i--) { 160 v = aa + 4*(diag[i]+1); 161 vi = aj + diag[i] + 1; 162 nz = ai[i+1] - diag[i] - 1; 163 s1 = x[i2]; s2 = x[i2+1]; 164 while (nz--) { 165 idx = 2*(*vi++); 166 x1 = x[idx]; x2 = x[1+idx]; 167 s1 -= v[0]*x1 + v[2]*x2; 168 s2 -= v[1]*x1 + v[3]*x2; 169 v += 4; 170 } 171 x[i2] = idiag[0]*s1 + idiag[2]*s2; 172 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 173 idiag -= 4; 174 i2 -= 2; 175 } 176 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 177 } 178 } else { 179 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 180 } 181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 182 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 188 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 PetscScalar *x,x1,x2,x3,s1,s2,s3; 192 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 193 PetscErrorCode ierr; 194 PetscInt m = a->mbs,i,i2,nz,idx; 195 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 196 197 PetscFunctionBegin; 198 its = its*lits; 199 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 200 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 201 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 202 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 203 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 204 205 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 206 207 diag = a->diag; 208 idiag = a->idiag; 209 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 212 if (flag & SOR_ZERO_INITIAL_GUESS) { 213 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 214 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 215 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 216 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 227 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 228 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 229 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 230 v += 9; 231 } 232 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 233 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 234 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 235 idiag += 9; 236 i2 += 3; 237 } 238 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 239 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 240 } 241 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 242 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 243 i2 = 0; 244 mdiag = a->idiag+9*a->mbs; 245 for (i=0; i<m; i++) { 246 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 247 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 248 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 249 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 250 mdiag += 9; 251 i2 += 3; 252 } 253 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 254 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 255 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 256 } 257 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 258 idiag = a->idiag+9*a->mbs - 9; 259 i2 = 3*m - 3; 260 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 261 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 262 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 263 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 264 idiag -= 9; 265 i2 -= 3; 266 for (i=m-2; i>=0; i--) { 267 v = aa + 9*(diag[i]+1); 268 vi = aj + diag[i] + 1; 269 nz = ai[i+1] - diag[i] - 1; 270 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 271 while (nz--) { 272 idx = 3*(*vi++); 273 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 274 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 275 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 276 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 277 v += 9; 278 } 279 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 280 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 281 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 282 idiag -= 9; 283 i2 -= 3; 284 } 285 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 286 } 287 } else { 288 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 289 } 290 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 291 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 297 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 298 { 299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 300 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 301 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 302 PetscErrorCode ierr; 303 PetscInt m = a->mbs,i,i2,nz,idx; 304 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 305 306 PetscFunctionBegin; 307 its = its*lits; 308 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 309 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 310 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 311 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 312 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 313 314 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 315 316 diag = a->diag; 317 idiag = a->idiag; 318 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 319 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 320 321 if (flag & SOR_ZERO_INITIAL_GUESS) { 322 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 323 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 324 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 325 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 326 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 327 i2 = 4; 328 idiag += 16; 329 for (i=1; i<m; i++) { 330 v = aa + 16*ai[i]; 331 vi = aj + ai[i]; 332 nz = diag[i] - ai[i]; 333 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 334 while (nz--) { 335 idx = 4*(*vi++); 336 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 337 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 338 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 339 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 340 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 341 v += 16; 342 } 343 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 344 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 345 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 346 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 347 idiag += 16; 348 i2 += 4; 349 } 350 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 351 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 352 } 353 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 354 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 355 i2 = 0; 356 mdiag = a->idiag+16*a->mbs; 357 for (i=0; i<m; i++) { 358 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 359 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 360 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 361 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 362 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 363 mdiag += 16; 364 i2 += 4; 365 } 366 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 367 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 369 } 370 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 371 idiag = a->idiag+16*a->mbs - 16; 372 i2 = 4*m - 4; 373 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 374 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 375 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 376 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 377 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 378 idiag -= 16; 379 i2 -= 4; 380 for (i=m-2; i>=0; i--) { 381 v = aa + 16*(diag[i]+1); 382 vi = aj + diag[i] + 1; 383 nz = ai[i+1] - diag[i] - 1; 384 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 385 while (nz--) { 386 idx = 4*(*vi++); 387 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 388 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 389 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 390 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 391 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 392 v += 16; 393 } 394 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 395 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 396 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 397 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 398 idiag -= 16; 399 i2 -= 4; 400 } 401 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 402 } 403 } else { 404 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 405 } 406 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 407 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 413 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 414 { 415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 416 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 417 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 418 PetscErrorCode ierr; 419 PetscInt m = a->mbs,i,i2,nz,idx; 420 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 421 422 PetscFunctionBegin; 423 its = its*lits; 424 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 425 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 426 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 427 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 428 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 429 430 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 431 432 diag = a->diag; 433 idiag = a->idiag; 434 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 435 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 436 437 if (flag & SOR_ZERO_INITIAL_GUESS) { 438 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 439 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 440 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 441 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 442 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 443 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 444 i2 = 5; 445 idiag += 25; 446 for (i=1; i<m; i++) { 447 v = aa + 25*ai[i]; 448 vi = aj + ai[i]; 449 nz = diag[i] - ai[i]; 450 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 451 while (nz--) { 452 idx = 5*(*vi++); 453 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 454 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 455 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 456 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 457 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 458 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 459 v += 25; 460 } 461 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 462 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 463 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 464 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 465 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 466 idiag += 25; 467 i2 += 5; 468 } 469 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 470 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 471 } 472 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 473 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 474 i2 = 0; 475 mdiag = a->idiag+25*a->mbs; 476 for (i=0; i<m; i++) { 477 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 478 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 479 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 480 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 481 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 482 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 483 mdiag += 25; 484 i2 += 5; 485 } 486 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 487 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 488 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 489 } 490 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 491 idiag = a->idiag+25*a->mbs - 25; 492 i2 = 5*m - 5; 493 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 494 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 495 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 496 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 497 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 498 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 499 idiag -= 25; 500 i2 -= 5; 501 for (i=m-2; i>=0; i--) { 502 v = aa + 25*(diag[i]+1); 503 vi = aj + diag[i] + 1; 504 nz = ai[i+1] - diag[i] - 1; 505 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 506 while (nz--) { 507 idx = 5*(*vi++); 508 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 509 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 510 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 511 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 512 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 513 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 514 v += 25; 515 } 516 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 517 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 518 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 519 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 520 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 521 idiag -= 25; 522 i2 -= 5; 523 } 524 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 525 } 526 } else { 527 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 528 } 529 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 530 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 /* 535 Special version for Fun3d sequential benchmark 536 */ 537 #if defined(PETSC_HAVE_FORTRAN_CAPS) 538 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 539 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 540 #define matsetvaluesblocked4_ matsetvaluesblocked4 541 #endif 542 543 EXTERN_C_BEGIN 544 #undef __FUNCT__ 545 #define __FUNCT__ "matsetvaluesblocked4_" 546 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 547 { 548 Mat A = *AA; 549 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 550 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 551 PetscInt *ai=a->i,*ailen=a->ilen; 552 PetscInt *aj=a->j,stepval; 553 const PetscScalar *value = v; 554 MatScalar *ap,*aa = a->a,*bap; 555 556 PetscFunctionBegin; 557 stepval = (n-1)*4; 558 for (k=0; k<m; k++) { /* loop over added rows */ 559 row = im[k]; 560 rp = aj + ai[row]; 561 ap = aa + 16*ai[row]; 562 nrow = ailen[row]; 563 low = 0; 564 for (l=0; l<n; l++) { /* loop over added columns */ 565 col = in[l]; 566 value = v + k*(stepval+4)*4 + l*4; 567 low = 0; high = nrow; 568 while (high-low > 7) { 569 t = (low+high)/2; 570 if (rp[t] > col) high = t; 571 else low = t; 572 } 573 for (i=low; i<high; i++) { 574 if (rp[i] > col) break; 575 if (rp[i] == col) { 576 bap = ap + 16*i; 577 for (ii=0; ii<4; ii++,value+=stepval) { 578 for (jj=ii; jj<16; jj+=4) { 579 bap[jj] += *value++; 580 } 581 } 582 goto noinsert2; 583 } 584 } 585 N = nrow++ - 1; 586 /* shift up all the later entries in this row */ 587 for (ii=N; ii>=i; ii--) { 588 rp[ii+1] = rp[ii]; 589 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 590 } 591 if (N >= i) { 592 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 593 } 594 rp[i] = col; 595 bap = ap + 16*i; 596 for (ii=0; ii<4; ii++,value+=stepval) { 597 for (jj=ii; jj<16; jj+=4) { 598 bap[jj] = *value++; 599 } 600 } 601 noinsert2:; 602 low = i; 603 } 604 ailen[row] = nrow; 605 } 606 PetscFunctionReturnVoid(); 607 } 608 EXTERN_C_END 609 610 #if defined(PETSC_HAVE_FORTRAN_CAPS) 611 #define matsetvalues4_ MATSETVALUES4 612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 613 #define matsetvalues4_ matsetvalues4 614 #endif 615 616 EXTERN_C_BEGIN 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatSetValues4_" 619 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 620 { 621 Mat A = *AA; 622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 623 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 624 PetscInt *ai=a->i,*ailen=a->ilen; 625 PetscInt *aj=a->j,brow,bcol; 626 PetscInt ridx,cidx; 627 MatScalar *ap,value,*aa=a->a,*bap; 628 629 PetscFunctionBegin; 630 for (k=0; k<m; k++) { /* loop over added rows */ 631 row = im[k]; brow = row/4; 632 rp = aj + ai[brow]; 633 ap = aa + 16*ai[brow]; 634 nrow = ailen[brow]; 635 low = 0; 636 for (l=0; l<n; l++) { /* loop over added columns */ 637 col = in[l]; bcol = col/4; 638 ridx = row % 4; cidx = col % 4; 639 value = v[l + k*n]; 640 low = 0; high = nrow; 641 while (high-low > 7) { 642 t = (low+high)/2; 643 if (rp[t] > bcol) high = t; 644 else low = t; 645 } 646 for (i=low; i<high; i++) { 647 if (rp[i] > bcol) break; 648 if (rp[i] == bcol) { 649 bap = ap + 16*i + 4*cidx + ridx; 650 *bap += value; 651 goto noinsert1; 652 } 653 } 654 N = nrow++ - 1; 655 /* shift up all the later entries in this row */ 656 for (ii=N; ii>=i; ii--) { 657 rp[ii+1] = rp[ii]; 658 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 659 } 660 if (N>=i) { 661 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 662 } 663 rp[i] = bcol; 664 ap[16*i + 4*cidx + ridx] = value; 665 noinsert1:; 666 low = i; 667 } 668 ailen[brow] = nrow; 669 } 670 PetscFunctionReturnVoid(); 671 } 672 EXTERN_C_END 673 674 /* UGLY, ugly, ugly 675 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 676 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 677 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 678 converts the entries into single precision and then calls ..._MatScalar() to put them 679 into the single precision data structures. 680 */ 681 #if defined(PETSC_USE_MAT_SINGLE) 682 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 683 #else 684 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 685 #endif 686 687 #define CHUNKSIZE 10 688 689 /* 690 Checks for missing diagonals 691 */ 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 694 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 695 { 696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 697 PetscErrorCode ierr; 698 PetscInt *diag,*jj = a->j,i; 699 700 PetscFunctionBegin; 701 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 702 diag = a->diag; 703 for (i=0; i<a->mbs; i++) { 704 if (jj[diag[i]] != i) { 705 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 706 } 707 } 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 713 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 714 { 715 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt i,j,*diag,m = a->mbs; 718 719 PetscFunctionBegin; 720 if (a->diag) PetscFunctionReturn(0); 721 722 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 723 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 724 for (i=0; i<m; i++) { 725 diag[i] = a->i[i+1]; 726 for (j=a->i[i]; j<a->i[i+1]; j++) { 727 if (a->j[j] == i) { 728 diag[i] = j; 729 break; 730 } 731 } 732 } 733 a->diag = diag; 734 PetscFunctionReturn(0); 735 } 736 737 738 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 742 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 743 { 744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 745 PetscErrorCode ierr; 746 PetscInt n = a->mbs,i; 747 748 PetscFunctionBegin; 749 *nn = n; 750 if (!ia) PetscFunctionReturn(0); 751 if (symmetric) { 752 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 753 } else if (oshift == 1) { 754 /* temporarily add 1 to i and j indices */ 755 PetscInt nz = a->i[n]; 756 for (i=0; i<nz; i++) a->j[i]++; 757 for (i=0; i<n+1; i++) a->i[i]++; 758 *ia = a->i; *ja = a->j; 759 } else { 760 *ia = a->i; *ja = a->j; 761 } 762 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 768 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 769 { 770 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 771 PetscErrorCode ierr; 772 PetscInt i,n = a->mbs; 773 774 PetscFunctionBegin; 775 if (!ia) PetscFunctionReturn(0); 776 if (symmetric) { 777 ierr = PetscFree(*ia);CHKERRQ(ierr); 778 ierr = PetscFree(*ja);CHKERRQ(ierr); 779 } else if (oshift == 1) { 780 PetscInt nz = a->i[n]-1; 781 for (i=0; i<nz; i++) a->j[i]--; 782 for (i=0; i<n+1; i++) a->i[i]--; 783 } 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatDestroy_SeqBAIJ" 789 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 790 { 791 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 #if defined(PETSC_USE_LOG) 796 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 797 #endif 798 ierr = PetscFree(a->a);CHKERRQ(ierr); 799 if (!a->singlemalloc) { 800 ierr = PetscFree(a->i);CHKERRQ(ierr); 801 ierr = PetscFree(a->j);CHKERRQ(ierr); 802 } 803 if (a->row) { 804 ierr = ISDestroy(a->row);CHKERRQ(ierr); 805 } 806 if (a->col) { 807 ierr = ISDestroy(a->col);CHKERRQ(ierr); 808 } 809 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 810 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 811 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 812 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 813 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 814 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 815 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 816 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 817 #if defined(PETSC_USE_MAT_SINGLE) 818 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 819 #endif 820 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 821 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 822 823 ierr = PetscFree(a);CHKERRQ(ierr); 824 825 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 826 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 827 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 828 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 829 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 830 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatSetOption_SeqBAIJ" 836 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 837 { 838 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 switch (op) { 843 case MAT_ROW_ORIENTED: 844 a->roworiented = PETSC_TRUE; 845 break; 846 case MAT_COLUMN_ORIENTED: 847 a->roworiented = PETSC_FALSE; 848 break; 849 case MAT_COLUMNS_SORTED: 850 a->sorted = PETSC_TRUE; 851 break; 852 case MAT_COLUMNS_UNSORTED: 853 a->sorted = PETSC_FALSE; 854 break; 855 case MAT_KEEP_ZEROED_ROWS: 856 a->keepzeroedrows = PETSC_TRUE; 857 break; 858 case MAT_NO_NEW_NONZERO_LOCATIONS: 859 a->nonew = 1; 860 break; 861 case MAT_NEW_NONZERO_LOCATION_ERR: 862 a->nonew = -1; 863 break; 864 case MAT_NEW_NONZERO_ALLOCATION_ERR: 865 a->nonew = -2; 866 break; 867 case MAT_YES_NEW_NONZERO_LOCATIONS: 868 a->nonew = 0; 869 break; 870 case MAT_ROWS_SORTED: 871 case MAT_ROWS_UNSORTED: 872 case MAT_YES_NEW_DIAGONALS: 873 case MAT_IGNORE_OFF_PROC_ENTRIES: 874 case MAT_USE_HASH_TABLE: 875 ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 876 break; 877 case MAT_NO_NEW_DIAGONALS: 878 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 879 case MAT_SYMMETRIC: 880 case MAT_STRUCTURALLY_SYMMETRIC: 881 case MAT_NOT_SYMMETRIC: 882 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 883 case MAT_HERMITIAN: 884 case MAT_NOT_HERMITIAN: 885 case MAT_SYMMETRY_ETERNAL: 886 case MAT_NOT_SYMMETRY_ETERNAL: 887 break; 888 default: 889 SETERRQ(PETSC_ERR_SUP,"unknown option"); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatGetRow_SeqBAIJ" 896 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 897 { 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 901 MatScalar *aa,*aa_i; 902 PetscScalar *v_i; 903 904 PetscFunctionBegin; 905 bs = A->bs; 906 ai = a->i; 907 aj = a->j; 908 aa = a->a; 909 bs2 = a->bs2; 910 911 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 912 913 bn = row/bs; /* Block number */ 914 bp = row % bs; /* Block Position */ 915 M = ai[bn+1] - ai[bn]; 916 *nz = bs*M; 917 918 if (v) { 919 *v = 0; 920 if (*nz) { 921 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 922 for (i=0; i<M; i++) { /* for each block in the block row */ 923 v_i = *v + i*bs; 924 aa_i = aa + bs2*(ai[bn] + i); 925 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 926 } 927 } 928 } 929 930 if (idx) { 931 *idx = 0; 932 if (*nz) { 933 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 934 for (i=0; i<M; i++) { /* for each block in the block row */ 935 idx_i = *idx + i*bs; 936 itmp = bs*aj[ai[bn] + i]; 937 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 938 } 939 } 940 } 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 946 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 947 { 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 952 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatTranspose_SeqBAIJ" 958 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 959 { 960 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 961 Mat C; 962 PetscErrorCode ierr; 963 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 964 PetscInt *rows,*cols,bs2=a->bs2; 965 PetscScalar *array; 966 967 PetscFunctionBegin; 968 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 969 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 970 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 971 972 #if defined(PETSC_USE_MAT_SINGLE) 973 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 974 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 975 #else 976 array = a->a; 977 #endif 978 979 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 980 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 981 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 982 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 983 ierr = PetscFree(col);CHKERRQ(ierr); 984 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 985 cols = rows + bs; 986 for (i=0; i<mbs; i++) { 987 cols[0] = i*bs; 988 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 989 len = ai[i+1] - ai[i]; 990 for (j=0; j<len; j++) { 991 rows[0] = (*aj++)*bs; 992 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 993 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 994 array += bs2; 995 } 996 } 997 ierr = PetscFree(rows);CHKERRQ(ierr); 998 #if defined(PETSC_USE_MAT_SINGLE) 999 ierr = PetscFree(array); 1000 #endif 1001 1002 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 1005 if (B) { 1006 *B = C; 1007 } else { 1008 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1015 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1016 { 1017 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1018 PetscErrorCode ierr; 1019 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1020 int fd; 1021 PetscScalar *aa; 1022 FILE *file; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1026 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1027 col_lens[0] = MAT_FILE_COOKIE; 1028 1029 col_lens[1] = A->m; 1030 col_lens[2] = A->n; 1031 col_lens[3] = a->nz*bs2; 1032 1033 /* store lengths of each row and write (including header) to file */ 1034 count = 0; 1035 for (i=0; i<a->mbs; i++) { 1036 for (j=0; j<bs; j++) { 1037 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1038 } 1039 } 1040 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1041 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1042 1043 /* store column indices (zero start index) */ 1044 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1045 count = 0; 1046 for (i=0; i<a->mbs; i++) { 1047 for (j=0; j<bs; j++) { 1048 for (k=a->i[i]; k<a->i[i+1]; k++) { 1049 for (l=0; l<bs; l++) { 1050 jj[count++] = bs*a->j[k] + l; 1051 } 1052 } 1053 } 1054 } 1055 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1056 ierr = PetscFree(jj);CHKERRQ(ierr); 1057 1058 /* store nonzero values */ 1059 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1060 count = 0; 1061 for (i=0; i<a->mbs; i++) { 1062 for (j=0; j<bs; j++) { 1063 for (k=a->i[i]; k<a->i[i+1]; k++) { 1064 for (l=0; l<bs; l++) { 1065 aa[count++] = a->a[bs2*k + l*bs + j]; 1066 } 1067 } 1068 } 1069 } 1070 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1071 ierr = PetscFree(aa);CHKERRQ(ierr); 1072 1073 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1074 if (file) { 1075 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1076 } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1082 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1083 { 1084 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1085 PetscErrorCode ierr; 1086 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1087 PetscViewerFormat format; 1088 1089 PetscFunctionBegin; 1090 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1091 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1092 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1093 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1094 Mat aij; 1095 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1096 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1097 ierr = MatDestroy(aij);CHKERRQ(ierr); 1098 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1099 PetscFunctionReturn(0); 1100 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1101 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1102 for (i=0; i<a->mbs; i++) { 1103 for (j=0; j<bs; j++) { 1104 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1105 for (k=a->i[i]; k<a->i[i+1]; k++) { 1106 for (l=0; l<bs; l++) { 1107 #if defined(PETSC_USE_COMPLEX) 1108 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1109 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1110 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1111 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1112 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1113 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1114 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1115 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1116 } 1117 #else 1118 if (a->a[bs2*k + l*bs + j] != 0.0) { 1119 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1120 } 1121 #endif 1122 } 1123 } 1124 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1125 } 1126 } 1127 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1130 for (i=0; i<a->mbs; i++) { 1131 for (j=0; j<bs; j++) { 1132 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1133 for (k=a->i[i]; k<a->i[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 #if defined(PETSC_USE_COMPLEX) 1136 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1137 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1138 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1139 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1140 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1141 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1142 } else { 1143 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1144 } 1145 #else 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1147 #endif 1148 } 1149 } 1150 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1151 } 1152 } 1153 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1154 } 1155 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1161 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1162 { 1163 Mat A = (Mat) Aa; 1164 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1167 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1168 MatScalar *aa; 1169 PetscViewer viewer; 1170 1171 PetscFunctionBegin; 1172 1173 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1174 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1175 1176 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1177 1178 /* loop over matrix elements drawing boxes */ 1179 color = PETSC_DRAW_BLUE; 1180 for (i=0,row=0; i<mbs; i++,row+=bs) { 1181 for (j=a->i[i]; j<a->i[i+1]; j++) { 1182 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1183 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1184 aa = a->a + j*bs2; 1185 for (k=0; k<bs; k++) { 1186 for (l=0; l<bs; l++) { 1187 if (PetscRealPart(*aa++) >= 0.) continue; 1188 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1189 } 1190 } 1191 } 1192 } 1193 color = PETSC_DRAW_CYAN; 1194 for (i=0,row=0; i<mbs; i++,row+=bs) { 1195 for (j=a->i[i]; j<a->i[i+1]; j++) { 1196 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1197 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1198 aa = a->a + j*bs2; 1199 for (k=0; k<bs; k++) { 1200 for (l=0; l<bs; l++) { 1201 if (PetscRealPart(*aa++) != 0.) continue; 1202 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1203 } 1204 } 1205 } 1206 } 1207 1208 color = PETSC_DRAW_RED; 1209 for (i=0,row=0; i<mbs; i++,row+=bs) { 1210 for (j=a->i[i]; j<a->i[i+1]; j++) { 1211 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1212 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1213 aa = a->a + j*bs2; 1214 for (k=0; k<bs; k++) { 1215 for (l=0; l<bs; l++) { 1216 if (PetscRealPart(*aa++) <= 0.) continue; 1217 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1218 } 1219 } 1220 } 1221 } 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1227 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1228 { 1229 PetscErrorCode ierr; 1230 PetscReal xl,yl,xr,yr,w,h; 1231 PetscDraw draw; 1232 PetscTruth isnull; 1233 1234 PetscFunctionBegin; 1235 1236 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1237 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1238 1239 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1240 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1241 xr += w; yr += h; xl = -w; yl = -h; 1242 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1243 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1244 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatView_SeqBAIJ" 1250 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1251 { 1252 PetscErrorCode ierr; 1253 PetscTruth iascii,isbinary,isdraw; 1254 1255 PetscFunctionBegin; 1256 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1257 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1258 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1259 if (iascii){ 1260 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1261 } else if (isbinary) { 1262 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1263 } else if (isdraw) { 1264 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1265 } else { 1266 Mat B; 1267 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1268 ierr = MatView(B,viewer);CHKERRQ(ierr); 1269 ierr = MatDestroy(B);CHKERRQ(ierr); 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1277 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1278 { 1279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1280 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1281 PetscInt *ai = a->i,*ailen = a->ilen; 1282 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1283 MatScalar *ap,*aa = a->a,zero = 0.0; 1284 1285 PetscFunctionBegin; 1286 for (k=0; k<m; k++) { /* loop over rows */ 1287 row = im[k]; brow = row/bs; 1288 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1289 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1290 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1291 nrow = ailen[brow]; 1292 for (l=0; l<n; l++) { /* loop over columns */ 1293 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1294 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1295 col = in[l] ; 1296 bcol = col/bs; 1297 cidx = col%bs; 1298 ridx = row%bs; 1299 high = nrow; 1300 low = 0; /* assume unsorted */ 1301 while (high-low > 5) { 1302 t = (low+high)/2; 1303 if (rp[t] > bcol) high = t; 1304 else low = t; 1305 } 1306 for (i=low; i<high; i++) { 1307 if (rp[i] > bcol) break; 1308 if (rp[i] == bcol) { 1309 *v++ = ap[bs2*i+bs*cidx+ridx]; 1310 goto finished; 1311 } 1312 } 1313 *v++ = zero; 1314 finished:; 1315 } 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #if defined(PETSC_USE_MAT_SINGLE) 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1323 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1324 { 1325 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1326 PetscErrorCode ierr; 1327 PetscInt i,N = m*n*b->bs2; 1328 MatScalar *vsingle; 1329 1330 PetscFunctionBegin; 1331 if (N > b->setvalueslen) { 1332 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1333 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1334 b->setvalueslen = N; 1335 } 1336 vsingle = b->setvaluescopy; 1337 for (i=0; i<N; i++) { 1338 vsingle[i] = v[i]; 1339 } 1340 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 #endif 1344 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1348 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1349 { 1350 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1351 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1352 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1353 PetscErrorCode ierr; 1354 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1355 PetscTruth roworiented=a->roworiented; 1356 const MatScalar *value = v; 1357 MatScalar *ap,*aa = a->a,*bap; 1358 1359 PetscFunctionBegin; 1360 if (roworiented) { 1361 stepval = (n-1)*bs; 1362 } else { 1363 stepval = (m-1)*bs; 1364 } 1365 for (k=0; k<m; k++) { /* loop over added rows */ 1366 row = im[k]; 1367 if (row < 0) continue; 1368 #if defined(PETSC_USE_DEBUG) 1369 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1370 #endif 1371 rp = aj + ai[row]; 1372 ap = aa + bs2*ai[row]; 1373 rmax = imax[row]; 1374 nrow = ailen[row]; 1375 low = 0; 1376 high = nrow; 1377 for (l=0; l<n; l++) { /* loop over added columns */ 1378 if (in[l] < 0) continue; 1379 #if defined(PETSC_USE_DEBUG) 1380 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1381 #endif 1382 col = in[l]; 1383 if (roworiented) { 1384 value = v + k*(stepval+bs)*bs + l*bs; 1385 } else { 1386 value = v + l*(stepval+bs)*bs + k*bs; 1387 } 1388 if (col < lastcol) low = 0; else high = nrow; 1389 lastcol = col; 1390 while (high-low > 7) { 1391 t = (low+high)/2; 1392 if (rp[t] > col) high = t; 1393 else low = t; 1394 } 1395 for (i=low; i<high; i++) { 1396 if (rp[i] > col) break; 1397 if (rp[i] == col) { 1398 bap = ap + bs2*i; 1399 if (roworiented) { 1400 if (is == ADD_VALUES) { 1401 for (ii=0; ii<bs; ii++,value+=stepval) { 1402 for (jj=ii; jj<bs2; jj+=bs) { 1403 bap[jj] += *value++; 1404 } 1405 } 1406 } else { 1407 for (ii=0; ii<bs; ii++,value+=stepval) { 1408 for (jj=ii; jj<bs2; jj+=bs) { 1409 bap[jj] = *value++; 1410 } 1411 } 1412 } 1413 } else { 1414 if (is == ADD_VALUES) { 1415 for (ii=0; ii<bs; ii++,value+=stepval) { 1416 for (jj=0; jj<bs; jj++) { 1417 *bap++ += *value++; 1418 } 1419 } 1420 } else { 1421 for (ii=0; ii<bs; ii++,value+=stepval) { 1422 for (jj=0; jj<bs; jj++) { 1423 *bap++ = *value++; 1424 } 1425 } 1426 } 1427 } 1428 goto noinsert2; 1429 } 1430 } 1431 if (nonew == 1) goto noinsert2; 1432 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1433 if (nrow >= rmax) { 1434 /* there is no extra room in row, therefore enlarge */ 1435 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1436 MatScalar *new_a; 1437 1438 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1439 1440 /* malloc new storage space */ 1441 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1442 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1443 new_j = (PetscInt*)(new_a + bs2*new_nz); 1444 new_i = new_j + new_nz; 1445 1446 /* copy over old data into new slots */ 1447 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1448 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1449 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1450 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1451 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1452 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1453 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1454 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1455 /* free up old matrix storage */ 1456 ierr = PetscFree(a->a);CHKERRQ(ierr); 1457 if (!a->singlemalloc) { 1458 ierr = PetscFree(a->i);CHKERRQ(ierr); 1459 ierr = PetscFree(a->j);CHKERRQ(ierr); 1460 } 1461 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1462 a->singlemalloc = PETSC_TRUE; 1463 1464 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1465 rmax = imax[row] = imax[row] + CHUNKSIZE; 1466 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1467 a->maxnz += bs2*CHUNKSIZE; 1468 a->reallocs++; 1469 a->nz++; 1470 } 1471 N = nrow++ - 1; 1472 /* shift up all the later entries in this row */ 1473 for (ii=N; ii>=i; ii--) { 1474 rp[ii+1] = rp[ii]; 1475 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1476 } 1477 if (N >= i) { 1478 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1479 } 1480 rp[i] = col; 1481 bap = ap + bs2*i; 1482 if (roworiented) { 1483 for (ii=0; ii<bs; ii++,value+=stepval) { 1484 for (jj=ii; jj<bs2; jj+=bs) { 1485 bap[jj] = *value++; 1486 } 1487 } 1488 } else { 1489 for (ii=0; ii<bs; ii++,value+=stepval) { 1490 for (jj=0; jj<bs; jj++) { 1491 *bap++ = *value++; 1492 } 1493 } 1494 } 1495 noinsert2:; 1496 low = i; 1497 } 1498 ailen[row] = nrow; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1508 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1509 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1510 PetscErrorCode ierr; 1511 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1512 MatScalar *aa = a->a,*ap; 1513 PetscReal ratio=0.6; 1514 1515 PetscFunctionBegin; 1516 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1517 1518 if (m) rmax = ailen[0]; 1519 for (i=1; i<mbs; i++) { 1520 /* move each row back by the amount of empty slots (fshift) before it*/ 1521 fshift += imax[i-1] - ailen[i-1]; 1522 rmax = PetscMax(rmax,ailen[i]); 1523 if (fshift) { 1524 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1525 N = ailen[i]; 1526 for (j=0; j<N; j++) { 1527 ip[j-fshift] = ip[j]; 1528 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1529 } 1530 } 1531 ai[i] = ai[i-1] + ailen[i-1]; 1532 } 1533 if (mbs) { 1534 fshift += imax[mbs-1] - ailen[mbs-1]; 1535 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1536 } 1537 /* reset ilen and imax for each row */ 1538 for (i=0; i<mbs; i++) { 1539 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1540 } 1541 a->nz = ai[mbs]; 1542 1543 /* diagonals may have moved, so kill the diagonal pointers */ 1544 a->idiagvalid = PETSC_FALSE; 1545 if (fshift && a->diag) { 1546 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1547 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1548 a->diag = 0; 1549 } 1550 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1551 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1552 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1553 a->reallocs = 0; 1554 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1555 1556 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1557 if (a->compressedrow.use){ 1558 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1559 } 1560 1561 A->same_nonzero = PETSC_TRUE; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* 1566 This function returns an array of flags which indicate the locations of contiguous 1567 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1568 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1569 Assume: sizes should be long enough to hold all the values. 1570 */ 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1574 { 1575 PetscInt i,j,k,row; 1576 PetscTruth flg; 1577 1578 PetscFunctionBegin; 1579 for (i=0,j=0; i<n; j++) { 1580 row = idx[i]; 1581 if (row%bs!=0) { /* Not the begining of a block */ 1582 sizes[j] = 1; 1583 i++; 1584 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1585 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1586 i++; 1587 } else { /* Begining of the block, so check if the complete block exists */ 1588 flg = PETSC_TRUE; 1589 for (k=1; k<bs; k++) { 1590 if (row+k != idx[i+k]) { /* break in the block */ 1591 flg = PETSC_FALSE; 1592 break; 1593 } 1594 } 1595 if (flg) { /* No break in the bs */ 1596 sizes[j] = bs; 1597 i+= bs; 1598 } else { 1599 sizes[j] = 1; 1600 i++; 1601 } 1602 } 1603 } 1604 *bs_max = j; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1611 { 1612 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1615 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1616 PetscScalar zero = 0.0; 1617 MatScalar *aa; 1618 1619 PetscFunctionBegin; 1620 /* Make a copy of the IS and sort it */ 1621 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1622 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1623 1624 /* allocate memory for rows,sizes */ 1625 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1626 sizes = rows + is_n; 1627 1628 /* copy IS values to rows, and sort them */ 1629 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1630 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1631 if (baij->keepzeroedrows) { 1632 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1633 bs_max = is_n; 1634 A->same_nonzero = PETSC_TRUE; 1635 } else { 1636 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1637 A->same_nonzero = PETSC_FALSE; 1638 } 1639 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1640 1641 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1642 row = rows[j]; 1643 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1644 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1645 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1646 if (sizes[i] == bs && !baij->keepzeroedrows) { 1647 if (diag) { 1648 if (baij->ilen[row/bs] > 0) { 1649 baij->ilen[row/bs] = 1; 1650 baij->j[baij->i[row/bs]] = row/bs; 1651 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1652 } 1653 /* Now insert all the diagonal values for this bs */ 1654 for (k=0; k<bs; k++) { 1655 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1656 } 1657 } else { /* (!diag) */ 1658 baij->ilen[row/bs] = 0; 1659 } /* end (!diag) */ 1660 } else { /* (sizes[i] != bs) */ 1661 #if defined (PETSC_USE_DEBUG) 1662 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1663 #endif 1664 for (k=0; k<count; k++) { 1665 aa[0] = zero; 1666 aa += bs; 1667 } 1668 if (diag) { 1669 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 1674 ierr = PetscFree(rows);CHKERRQ(ierr); 1675 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1681 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1682 { 1683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1684 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1685 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1686 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1687 PetscErrorCode ierr; 1688 PetscInt ridx,cidx,bs2=a->bs2; 1689 PetscTruth roworiented=a->roworiented; 1690 MatScalar *ap,value,*aa=a->a,*bap; 1691 1692 PetscFunctionBegin; 1693 for (k=0; k<m; k++) { /* loop over added rows */ 1694 row = im[k]; brow = row/bs; 1695 if (row < 0) continue; 1696 #if defined(PETSC_USE_DEBUG) 1697 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1698 #endif 1699 rp = aj + ai[brow]; 1700 ap = aa + bs2*ai[brow]; 1701 rmax = imax[brow]; 1702 nrow = ailen[brow]; 1703 low = 0; 1704 high = nrow; 1705 for (l=0; l<n; l++) { /* loop over added columns */ 1706 if (in[l] < 0) continue; 1707 #if defined(PETSC_USE_DEBUG) 1708 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1709 #endif 1710 col = in[l]; bcol = col/bs; 1711 ridx = row % bs; cidx = col % bs; 1712 if (roworiented) { 1713 value = v[l + k*n]; 1714 } else { 1715 value = v[k + l*m]; 1716 } 1717 if (col < lastcol) low = 0; else high = nrow; 1718 lastcol = col; 1719 while (high-low > 7) { 1720 t = (low+high)/2; 1721 if (rp[t] > bcol) high = t; 1722 else low = t; 1723 } 1724 for (i=low; i<high; i++) { 1725 if (rp[i] > bcol) break; 1726 if (rp[i] == bcol) { 1727 bap = ap + bs2*i + bs*cidx + ridx; 1728 if (is == ADD_VALUES) *bap += value; 1729 else *bap = value; 1730 goto noinsert1; 1731 } 1732 } 1733 if (nonew == 1) goto noinsert1; 1734 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1735 if (nrow >= rmax) { 1736 /* there is no extra room in row, therefore enlarge */ 1737 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1738 MatScalar *new_a; 1739 1740 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1741 1742 /* Malloc new storage space */ 1743 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1744 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1745 new_j = (PetscInt*)(new_a + bs2*new_nz); 1746 new_i = new_j + new_nz; 1747 1748 /* copy over old data into new slots */ 1749 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1750 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1751 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1752 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1753 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1755 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1756 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1757 /* free up old matrix storage */ 1758 ierr = PetscFree(a->a);CHKERRQ(ierr); 1759 if (!a->singlemalloc) { 1760 ierr = PetscFree(a->i);CHKERRQ(ierr); 1761 ierr = PetscFree(a->j);CHKERRQ(ierr); 1762 } 1763 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1764 a->singlemalloc = PETSC_TRUE; 1765 1766 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1767 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1768 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 1769 a->maxnz += bs2*CHUNKSIZE; 1770 a->reallocs++; 1771 a->nz++; 1772 } 1773 N = nrow++ - 1; 1774 /* shift up all the later entries in this row */ 1775 for (ii=N; ii>=i; ii--) { 1776 rp[ii+1] = rp[ii]; 1777 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1778 } 1779 if (N>=i) { 1780 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1781 } 1782 rp[i] = bcol; 1783 ap[bs2*i + bs*cidx + ridx] = value; 1784 noinsert1:; 1785 low = i; 1786 } 1787 ailen[brow] = nrow; 1788 } 1789 A->same_nonzero = PETSC_FALSE; 1790 PetscFunctionReturn(0); 1791 } 1792 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1796 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1797 { 1798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1799 Mat outA; 1800 PetscErrorCode ierr; 1801 PetscTruth row_identity,col_identity; 1802 1803 PetscFunctionBegin; 1804 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1805 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1806 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1807 if (!row_identity || !col_identity) { 1808 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1809 } 1810 1811 outA = inA; 1812 inA->factor = FACTOR_LU; 1813 1814 if (!a->diag) { 1815 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1816 } 1817 1818 a->row = row; 1819 a->col = col; 1820 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1821 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1822 1823 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1824 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1825 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1826 1827 /* 1828 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1829 for ILU(0) factorization with natural ordering 1830 */ 1831 if (inA->bs < 8) { 1832 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1833 } else { 1834 if (!a->solve_work) { 1835 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1836 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1837 } 1838 } 1839 1840 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1841 1842 PetscFunctionReturn(0); 1843 } 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1846 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1847 { 1848 static PetscTruth called = PETSC_FALSE; 1849 MPI_Comm comm = A->comm; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1854 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1855 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 EXTERN_C_BEGIN 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1862 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1863 { 1864 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1865 PetscInt i,nz,nbs; 1866 1867 PetscFunctionBegin; 1868 nz = baij->maxnz/baij->bs2; 1869 nbs = baij->nbs; 1870 for (i=0; i<nz; i++) { 1871 baij->j[i] = indices[i]; 1872 } 1873 baij->nz = nz; 1874 for (i=0; i<nbs; i++) { 1875 baij->ilen[i] = baij->imax[i]; 1876 } 1877 1878 PetscFunctionReturn(0); 1879 } 1880 EXTERN_C_END 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1884 /*@ 1885 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1886 in the matrix. 1887 1888 Input Parameters: 1889 + mat - the SeqBAIJ matrix 1890 - indices - the column indices 1891 1892 Level: advanced 1893 1894 Notes: 1895 This can be called if you have precomputed the nonzero structure of the 1896 matrix and want to provide it to the matrix object to improve the performance 1897 of the MatSetValues() operation. 1898 1899 You MUST have set the correct numbers of nonzeros per row in the call to 1900 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1901 1902 MUST be called before any calls to MatSetValues(); 1903 1904 @*/ 1905 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1906 { 1907 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1908 1909 PetscFunctionBegin; 1910 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1911 PetscValidPointer(indices,2); 1912 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1913 if (f) { 1914 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1915 } else { 1916 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1923 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1924 { 1925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1926 PetscErrorCode ierr; 1927 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1928 PetscReal atmp; 1929 PetscScalar *x,zero = 0.0; 1930 MatScalar *aa; 1931 PetscInt ncols,brow,krow,kcol; 1932 1933 PetscFunctionBegin; 1934 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1935 bs = A->bs; 1936 aa = a->a; 1937 ai = a->i; 1938 aj = a->j; 1939 mbs = a->mbs; 1940 1941 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1942 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1943 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1944 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1945 for (i=0; i<mbs; i++) { 1946 ncols = ai[1] - ai[0]; ai++; 1947 brow = bs*i; 1948 for (j=0; j<ncols; j++){ 1949 /* bcol = bs*(*aj); */ 1950 for (kcol=0; kcol<bs; kcol++){ 1951 for (krow=0; krow<bs; krow++){ 1952 atmp = PetscAbsScalar(*aa); aa++; 1953 row = brow + krow; /* row index */ 1954 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1955 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1956 } 1957 } 1958 aj++; 1959 } 1960 } 1961 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1967 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1968 { 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1978 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1979 { 1980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1981 PetscFunctionBegin; 1982 *array = a->a; 1983 PetscFunctionReturn(0); 1984 } 1985 1986 #undef __FUNCT__ 1987 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1988 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1989 { 1990 PetscFunctionBegin; 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #include "petscblaslapack.h" 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1997 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1998 { 1999 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2000 PetscErrorCode ierr; 2001 PetscInt i,bs=Y->bs,j,bs2; 2002 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 2003 2004 PetscFunctionBegin; 2005 if (str == SAME_NONZERO_PATTERN) { 2006 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 2007 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2008 if (y->xtoy && y->XtoY != X) { 2009 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2010 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2011 } 2012 if (!y->xtoy) { /* get xtoy */ 2013 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2014 y->XtoY = X; 2015 } 2016 bs2 = bs*bs; 2017 for (i=0; i<x->nz; i++) { 2018 j = 0; 2019 while (j < bs2){ 2020 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2021 j++; 2022 } 2023 } 2024 ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2025 } else { 2026 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 /* -------------------------------------------------------------------*/ 2032 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2033 MatGetRow_SeqBAIJ, 2034 MatRestoreRow_SeqBAIJ, 2035 MatMult_SeqBAIJ_N, 2036 /* 4*/ MatMultAdd_SeqBAIJ_N, 2037 MatMultTranspose_SeqBAIJ, 2038 MatMultTransposeAdd_SeqBAIJ, 2039 MatSolve_SeqBAIJ_N, 2040 0, 2041 0, 2042 /*10*/ 0, 2043 MatLUFactor_SeqBAIJ, 2044 0, 2045 0, 2046 MatTranspose_SeqBAIJ, 2047 /*15*/ MatGetInfo_SeqBAIJ, 2048 MatEqual_SeqBAIJ, 2049 MatGetDiagonal_SeqBAIJ, 2050 MatDiagonalScale_SeqBAIJ, 2051 MatNorm_SeqBAIJ, 2052 /*20*/ 0, 2053 MatAssemblyEnd_SeqBAIJ, 2054 0, 2055 MatSetOption_SeqBAIJ, 2056 MatZeroEntries_SeqBAIJ, 2057 /*25*/ MatZeroRows_SeqBAIJ, 2058 MatLUFactorSymbolic_SeqBAIJ, 2059 MatLUFactorNumeric_SeqBAIJ_N, 2060 MatCholeskyFactorSymbolic_SeqBAIJ, 2061 MatCholeskyFactorNumeric_SeqBAIJ_N, 2062 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2063 MatILUFactorSymbolic_SeqBAIJ, 2064 MatICCFactorSymbolic_SeqBAIJ, 2065 MatGetArray_SeqBAIJ, 2066 MatRestoreArray_SeqBAIJ, 2067 /*35*/ MatDuplicate_SeqBAIJ, 2068 0, 2069 0, 2070 MatILUFactor_SeqBAIJ, 2071 0, 2072 /*40*/ MatAXPY_SeqBAIJ, 2073 MatGetSubMatrices_SeqBAIJ, 2074 MatIncreaseOverlap_SeqBAIJ, 2075 MatGetValues_SeqBAIJ, 2076 0, 2077 /*45*/ MatPrintHelp_SeqBAIJ, 2078 MatScale_SeqBAIJ, 2079 0, 2080 0, 2081 0, 2082 /*50*/ 0, 2083 MatGetRowIJ_SeqBAIJ, 2084 MatRestoreRowIJ_SeqBAIJ, 2085 0, 2086 0, 2087 /*55*/ 0, 2088 0, 2089 0, 2090 0, 2091 MatSetValuesBlocked_SeqBAIJ, 2092 /*60*/ MatGetSubMatrix_SeqBAIJ, 2093 MatDestroy_SeqBAIJ, 2094 MatView_SeqBAIJ, 2095 MatGetPetscMaps_Petsc, 2096 0, 2097 /*65*/ 0, 2098 0, 2099 0, 2100 0, 2101 0, 2102 /*70*/ MatGetRowMax_SeqBAIJ, 2103 MatConvert_Basic, 2104 0, 2105 0, 2106 0, 2107 /*75*/ 0, 2108 0, 2109 0, 2110 0, 2111 0, 2112 /*80*/ 0, 2113 0, 2114 0, 2115 0, 2116 MatLoad_SeqBAIJ, 2117 /*85*/ 0, 2118 0, 2119 0, 2120 0, 2121 0, 2122 /*90*/ 0, 2123 0, 2124 0, 2125 0, 2126 0, 2127 /*95*/ 0, 2128 0, 2129 0, 2130 0}; 2131 2132 EXTERN_C_BEGIN 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2135 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2136 { 2137 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2138 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2139 PetscErrorCode ierr; 2140 2141 PetscFunctionBegin; 2142 if (aij->nonew != 1) { 2143 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2144 } 2145 2146 /* allocate space for values if not already there */ 2147 if (!aij->saved_values) { 2148 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2149 } 2150 2151 /* copy values over */ 2152 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 EXTERN_C_END 2156 2157 EXTERN_C_BEGIN 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2160 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2161 { 2162 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2163 PetscErrorCode ierr; 2164 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2165 2166 PetscFunctionBegin; 2167 if (aij->nonew != 1) { 2168 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2169 } 2170 if (!aij->saved_values) { 2171 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2172 } 2173 2174 /* copy values over */ 2175 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 EXTERN_C_END 2179 2180 EXTERN_C_BEGIN 2181 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 2182 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*); 2183 EXTERN_C_END 2184 2185 EXTERN_C_BEGIN 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2188 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2189 { 2190 Mat_SeqBAIJ *b; 2191 PetscErrorCode ierr; 2192 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2193 PetscTruth flg; 2194 2195 PetscFunctionBegin; 2196 2197 B->preallocated = PETSC_TRUE; 2198 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2199 if (nnz && newbs != bs) { 2200 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2201 } 2202 bs = newbs; 2203 2204 mbs = B->m/bs; 2205 nbs = B->n/bs; 2206 bs2 = bs*bs; 2207 2208 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2209 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2210 } 2211 2212 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2213 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2214 if (nnz) { 2215 for (i=0; i<mbs; i++) { 2216 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2217 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2218 } 2219 } 2220 2221 b = (Mat_SeqBAIJ*)B->data; 2222 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2223 B->ops->solve = MatSolve_SeqBAIJ_Update; 2224 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2225 if (!flg) { 2226 switch (bs) { 2227 case 1: 2228 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2229 B->ops->mult = MatMult_SeqBAIJ_1; 2230 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2231 break; 2232 case 2: 2233 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2234 B->ops->mult = MatMult_SeqBAIJ_2; 2235 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2236 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2237 break; 2238 case 3: 2239 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2240 B->ops->mult = MatMult_SeqBAIJ_3; 2241 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2242 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2243 break; 2244 case 4: 2245 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2246 B->ops->mult = MatMult_SeqBAIJ_4; 2247 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2248 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2249 break; 2250 case 5: 2251 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2252 B->ops->mult = MatMult_SeqBAIJ_5; 2253 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2254 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2255 break; 2256 case 6: 2257 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2258 B->ops->mult = MatMult_SeqBAIJ_6; 2259 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2260 break; 2261 case 7: 2262 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2263 B->ops->mult = MatMult_SeqBAIJ_7; 2264 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2265 break; 2266 default: 2267 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2268 B->ops->mult = MatMult_SeqBAIJ_N; 2269 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2270 break; 2271 } 2272 } 2273 B->bs = bs; 2274 b->mbs = mbs; 2275 b->nbs = nbs; 2276 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2277 if (!nnz) { 2278 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2279 else if (nz <= 0) nz = 1; 2280 for (i=0; i<mbs; i++) b->imax[i] = nz; 2281 nz = nz*mbs; 2282 } else { 2283 nz = 0; 2284 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2285 } 2286 2287 /* allocate the matrix space */ 2288 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2289 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2290 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2291 b->j = (PetscInt*)(b->a + nz*bs2); 2292 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2293 b->i = b->j + nz; 2294 b->singlemalloc = PETSC_TRUE; 2295 2296 b->i[0] = 0; 2297 for (i=1; i<mbs+1; i++) { 2298 b->i[i] = b->i[i-1] + b->imax[i-1]; 2299 } 2300 2301 /* b->ilen will count nonzeros in each block row so far. */ 2302 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2303 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2304 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2305 2306 B->bs = bs; 2307 b->bs2 = bs2; 2308 b->mbs = mbs; 2309 b->nz = 0; 2310 b->maxnz = nz*bs2; 2311 B->info.nz_unneeded = (PetscReal)b->maxnz; 2312 PetscFunctionReturn(0); 2313 } 2314 EXTERN_C_END 2315 2316 /*MC 2317 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2318 block sparse compressed row format. 2319 2320 Options Database Keys: 2321 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2322 2323 Level: beginner 2324 2325 .seealso: MatCreateSeqBAIJ 2326 M*/ 2327 2328 EXTERN_C_BEGIN 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatCreate_SeqBAIJ" 2331 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2332 { 2333 PetscErrorCode ierr; 2334 PetscMPIInt size; 2335 Mat_SeqBAIJ *b; 2336 2337 PetscFunctionBegin; 2338 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2339 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2340 2341 B->m = B->M = PetscMax(B->m,B->M); 2342 B->n = B->N = PetscMax(B->n,B->N); 2343 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2344 B->data = (void*)b; 2345 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2346 B->factor = 0; 2347 B->mapping = 0; 2348 b->row = 0; 2349 b->col = 0; 2350 b->icol = 0; 2351 b->reallocs = 0; 2352 b->saved_values = 0; 2353 #if defined(PETSC_USE_MAT_SINGLE) 2354 b->setvalueslen = 0; 2355 b->setvaluescopy = PETSC_NULL; 2356 #endif 2357 2358 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2359 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2360 2361 b->sorted = PETSC_FALSE; 2362 b->roworiented = PETSC_TRUE; 2363 b->nonew = 0; 2364 b->diag = 0; 2365 b->solve_work = 0; 2366 b->mult_work = 0; 2367 B->spptr = 0; 2368 B->info.nz_unneeded = (PetscReal)b->maxnz; 2369 b->keepzeroedrows = PETSC_FALSE; 2370 b->xtoy = 0; 2371 b->XtoY = 0; 2372 b->compressedrow.use = PETSC_FALSE; 2373 b->compressedrow.nrows = 0; 2374 b->compressedrow.i = PETSC_NULL; 2375 b->compressedrow.rindex = PETSC_NULL; 2376 b->compressedrow.checked = PETSC_FALSE; 2377 B->same_nonzero = PETSC_FALSE; 2378 2379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2380 "MatStoreValues_SeqBAIJ", 2381 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2383 "MatRetrieveValues_SeqBAIJ", 2384 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2386 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2387 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2389 "MatConvert_SeqBAIJ_SeqAIJ", 2390 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2391 #if !defined(PETSC_USE_64BIT_INT) 2392 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2393 "MatConvert_SeqBAIJ_SeqSBAIJ", 2394 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2395 #endif 2396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2397 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2398 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2399 PetscFunctionReturn(0); 2400 } 2401 EXTERN_C_END 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2405 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2406 { 2407 Mat C; 2408 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2409 PetscErrorCode ierr; 2410 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2411 2412 PetscFunctionBegin; 2413 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2414 2415 *B = 0; 2416 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2417 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2418 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2419 c = (Mat_SeqBAIJ*)C->data; 2420 2421 C->M = A->M; 2422 C->N = A->N; 2423 C->bs = A->bs; 2424 c->bs2 = a->bs2; 2425 c->mbs = a->mbs; 2426 c->nbs = a->nbs; 2427 2428 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2429 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2430 for (i=0; i<mbs; i++) { 2431 c->imax[i] = a->imax[i]; 2432 c->ilen[i] = a->ilen[i]; 2433 } 2434 2435 /* allocate the matrix space */ 2436 c->singlemalloc = PETSC_TRUE; 2437 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2438 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2439 c->j = (PetscInt*)(c->a + nz*bs2); 2440 c->i = c->j + nz; 2441 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2442 if (mbs > 0) { 2443 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2444 if (cpvalues == MAT_COPY_VALUES) { 2445 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2446 } else { 2447 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2448 } 2449 } 2450 2451 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2452 c->sorted = a->sorted; 2453 c->roworiented = a->roworiented; 2454 c->nonew = a->nonew; 2455 2456 if (a->diag) { 2457 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2458 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2459 for (i=0; i<mbs; i++) { 2460 c->diag[i] = a->diag[i]; 2461 } 2462 } else c->diag = 0; 2463 c->nz = a->nz; 2464 c->maxnz = a->maxnz; 2465 c->solve_work = 0; 2466 c->mult_work = 0; 2467 C->preallocated = PETSC_TRUE; 2468 C->assembled = PETSC_TRUE; 2469 2470 c->compressedrow.use = a->compressedrow.use; 2471 c->compressedrow.nrows = a->compressedrow.nrows; 2472 c->compressedrow.checked = a->compressedrow.checked; 2473 if ( a->compressedrow.checked && a->compressedrow.use){ 2474 i = a->compressedrow.nrows; 2475 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2476 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2477 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2479 } else { 2480 c->compressedrow.use = PETSC_FALSE; 2481 c->compressedrow.i = PETSC_NULL; 2482 c->compressedrow.rindex = PETSC_NULL; 2483 } 2484 C->same_nonzero = A->same_nonzero; 2485 *B = C; 2486 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatLoad_SeqBAIJ" 2492 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2493 { 2494 Mat_SeqBAIJ *a; 2495 Mat B; 2496 PetscErrorCode ierr; 2497 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2498 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2499 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2500 PetscInt *masked,nmask,tmp,bs2,ishift; 2501 PetscMPIInt size; 2502 int fd; 2503 PetscScalar *aa; 2504 MPI_Comm comm = ((PetscObject)viewer)->comm; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2508 bs2 = bs*bs; 2509 2510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2511 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2512 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2513 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2514 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2515 M = header[1]; N = header[2]; nz = header[3]; 2516 2517 if (header[3] < 0) { 2518 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2519 } 2520 2521 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2522 2523 /* 2524 This code adds extra rows to make sure the number of rows is 2525 divisible by the blocksize 2526 */ 2527 mbs = M/bs; 2528 extra_rows = bs - M + bs*(mbs); 2529 if (extra_rows == bs) extra_rows = 0; 2530 else mbs++; 2531 if (extra_rows) { 2532 ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2533 } 2534 2535 /* read in row lengths */ 2536 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2537 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2538 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2539 2540 /* read in column indices */ 2541 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2542 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2543 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2544 2545 /* loop over row lengths determining block row lengths */ 2546 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2547 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2548 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2549 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2550 masked = mask + mbs; 2551 rowcount = 0; nzcount = 0; 2552 for (i=0; i<mbs; i++) { 2553 nmask = 0; 2554 for (j=0; j<bs; j++) { 2555 kmax = rowlengths[rowcount]; 2556 for (k=0; k<kmax; k++) { 2557 tmp = jj[nzcount++]/bs; 2558 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2559 } 2560 rowcount++; 2561 } 2562 browlengths[i] += nmask; 2563 /* zero out the mask elements we set */ 2564 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2565 } 2566 2567 /* create our matrix */ 2568 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2569 ierr = MatSetType(B,type);CHKERRQ(ierr); 2570 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2571 a = (Mat_SeqBAIJ*)B->data; 2572 2573 /* set matrix "i" values */ 2574 a->i[0] = 0; 2575 for (i=1; i<= mbs; i++) { 2576 a->i[i] = a->i[i-1] + browlengths[i-1]; 2577 a->ilen[i-1] = browlengths[i-1]; 2578 } 2579 a->nz = 0; 2580 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2581 2582 /* read in nonzero values */ 2583 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2584 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2585 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2586 2587 /* set "a" and "j" values into matrix */ 2588 nzcount = 0; jcount = 0; 2589 for (i=0; i<mbs; i++) { 2590 nzcountb = nzcount; 2591 nmask = 0; 2592 for (j=0; j<bs; j++) { 2593 kmax = rowlengths[i*bs+j]; 2594 for (k=0; k<kmax; k++) { 2595 tmp = jj[nzcount++]/bs; 2596 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2597 } 2598 } 2599 /* sort the masked values */ 2600 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2601 2602 /* set "j" values into matrix */ 2603 maskcount = 1; 2604 for (j=0; j<nmask; j++) { 2605 a->j[jcount++] = masked[j]; 2606 mask[masked[j]] = maskcount++; 2607 } 2608 /* set "a" values into matrix */ 2609 ishift = bs2*a->i[i]; 2610 for (j=0; j<bs; j++) { 2611 kmax = rowlengths[i*bs+j]; 2612 for (k=0; k<kmax; k++) { 2613 tmp = jj[nzcountb]/bs ; 2614 block = mask[tmp] - 1; 2615 point = jj[nzcountb] - bs*tmp; 2616 idx = ishift + bs2*block + j + bs*point; 2617 a->a[idx] = (MatScalar)aa[nzcountb++]; 2618 } 2619 } 2620 /* zero out the mask elements we set */ 2621 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2622 } 2623 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2624 2625 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2626 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2627 ierr = PetscFree(aa);CHKERRQ(ierr); 2628 ierr = PetscFree(jj);CHKERRQ(ierr); 2629 ierr = PetscFree(mask);CHKERRQ(ierr); 2630 2631 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2632 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 ierr = MatView_Private(B);CHKERRQ(ierr); 2634 2635 *A = B; 2636 PetscFunctionReturn(0); 2637 } 2638 2639 #undef __FUNCT__ 2640 #define __FUNCT__ "MatCreateSeqBAIJ" 2641 /*@C 2642 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2643 compressed row) format. For good matrix assembly performance the 2644 user should preallocate the matrix storage by setting the parameter nz 2645 (or the array nnz). By setting these parameters accurately, performance 2646 during matrix assembly can be increased by more than a factor of 50. 2647 2648 Collective on MPI_Comm 2649 2650 Input Parameters: 2651 + comm - MPI communicator, set to PETSC_COMM_SELF 2652 . bs - size of block 2653 . m - number of rows 2654 . n - number of columns 2655 . nz - number of nonzero blocks per block row (same for all rows) 2656 - nnz - array containing the number of nonzero blocks in the various block rows 2657 (possibly different for each block row) or PETSC_NULL 2658 2659 Output Parameter: 2660 . A - the matrix 2661 2662 Options Database Keys: 2663 . -mat_no_unroll - uses code that does not unroll the loops in the 2664 block calculations (much slower) 2665 . -mat_block_size - size of the blocks to use 2666 2667 Level: intermediate 2668 2669 Notes: 2670 The number of rows and columns must be divisible by blocksize. 2671 2672 If the nnz parameter is given then the nz parameter is ignored 2673 2674 A nonzero block is any block that as 1 or more nonzeros in it 2675 2676 The block AIJ format is fully compatible with standard Fortran 77 2677 storage. That is, the stored row and column indices can begin at 2678 either one (as in Fortran) or zero. See the users' manual for details. 2679 2680 Specify the preallocated storage with either nz or nnz (not both). 2681 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2682 allocation. For additional details, see the users manual chapter on 2683 matrices. 2684 2685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2686 @*/ 2687 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2688 { 2689 PetscErrorCode ierr; 2690 2691 PetscFunctionBegin; 2692 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2693 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2694 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 #undef __FUNCT__ 2699 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2700 /*@C 2701 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2702 per row in the matrix. For good matrix assembly performance the 2703 user should preallocate the matrix storage by setting the parameter nz 2704 (or the array nnz). By setting these parameters accurately, performance 2705 during matrix assembly can be increased by more than a factor of 50. 2706 2707 Collective on MPI_Comm 2708 2709 Input Parameters: 2710 + A - the matrix 2711 . bs - size of block 2712 . nz - number of block nonzeros per block row (same for all rows) 2713 - nnz - array containing the number of block nonzeros in the various block rows 2714 (possibly different for each block row) or PETSC_NULL 2715 2716 Options Database Keys: 2717 . -mat_no_unroll - uses code that does not unroll the loops in the 2718 block calculations (much slower) 2719 . -mat_block_size - size of the blocks to use 2720 2721 Level: intermediate 2722 2723 Notes: 2724 If the nnz parameter is given then the nz parameter is ignored 2725 2726 The block AIJ format is fully compatible with standard Fortran 77 2727 storage. That is, the stored row and column indices can begin at 2728 either one (as in Fortran) or zero. See the users' manual for details. 2729 2730 Specify the preallocated storage with either nz or nnz (not both). 2731 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2732 allocation. For additional details, see the users manual chapter on 2733 matrices. 2734 2735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2736 @*/ 2737 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2738 { 2739 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2740 2741 PetscFunctionBegin; 2742 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2743 if (f) { 2744 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2745 } 2746 PetscFunctionReturn(0); 2747 } 2748 2749