1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 15 int MatInvertBlockDiagonal_SeqBAIJ(Mat A) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr; 19 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) PetscFunctionReturn(0); 23 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 24 diag_offset = a->diag; 25 if (!a->idiag) { 26 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 27 } 28 diag = a->idiag; 29 mdiag = a->idiag+bs*bs*mbs; 30 /* factor and invert each block */ 31 switch (a->bs){ 32 case 2: 33 for (i=0; i<mbs; i++) { 34 odiag = v + 4*diag_offset[i]; 35 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 36 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 37 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 38 diag += 4; 39 mdiag += 4; 40 } 41 break; 42 case 3: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 9*diag_offset[i]; 45 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 46 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 47 diag[8] = odiag[8]; 48 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 49 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 50 mdiag[8] = odiag[8]; 51 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 52 diag += 9; 53 mdiag += 9; 54 } 55 break; 56 case 4: 57 for (i=0; i<mbs; i++) { 58 odiag = v + 16*diag_offset[i]; 59 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 61 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 62 diag += 16; 63 mdiag += 16; 64 } 65 break; 66 case 5: 67 for (i=0; i<mbs; i++) { 68 odiag = v + 25*diag_offset[i]; 69 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 71 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 72 diag += 25; 73 mdiag += 25; 74 } 75 break; 76 default: 77 SETERRQ1(1,"not supported for block size %d",a->bs); 78 } 79 a->idiagvalid = PETSC_TRUE; 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 85 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 88 PetscScalar *x,x1,x2,s1,s2; 89 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 90 int ierr,m = a->mbs,i,i2,nz,idx; 91 const int *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 int ierr,m = a->mbs,i,i2,nz,idx; 192 const int *diag,*ai = a->i,*aj = a->j,*vi; 193 194 PetscFunctionBegin; 195 its = its*lits; 196 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 197 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 198 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 199 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 200 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 201 202 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 203 204 diag = a->diag; 205 idiag = a->idiag; 206 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 207 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 208 209 if (flag & SOR_ZERO_INITIAL_GUESS) { 210 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 211 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 212 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 213 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 214 i2 = 3; 215 idiag += 9; 216 for (i=1; i<m; i++) { 217 v = aa + 9*ai[i]; 218 vi = aj + ai[i]; 219 nz = diag[i] - ai[i]; 220 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 221 while (nz--) { 222 idx = 3*(*vi++); 223 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 224 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 225 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 226 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 227 v += 9; 228 } 229 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 230 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 231 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 232 idiag += 9; 233 i2 += 3; 234 } 235 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 236 PetscLogFlops(9*(a->nz)); 237 } 238 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 239 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 240 i2 = 0; 241 mdiag = a->idiag+9*a->mbs; 242 for (i=0; i<m; i++) { 243 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 244 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 245 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 246 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 247 mdiag += 9; 248 i2 += 3; 249 } 250 PetscLogFlops(15*m); 251 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 252 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 255 idiag = a->idiag+9*a->mbs - 9; 256 i2 = 3*m - 3; 257 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 258 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 259 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 260 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 261 idiag -= 9; 262 i2 -= 3; 263 for (i=m-2; i>=0; i--) { 264 v = aa + 9*(diag[i]+1); 265 vi = aj + diag[i] + 1; 266 nz = ai[i+1] - diag[i] - 1; 267 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 268 while (nz--) { 269 idx = 3*(*vi++); 270 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 271 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 272 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 273 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 274 v += 9; 275 } 276 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 277 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 278 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 279 idiag -= 9; 280 i2 -= 3; 281 } 282 PetscLogFlops(9*(a->nz)); 283 } 284 } else { 285 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 286 } 287 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 288 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 294 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 295 { 296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 297 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 298 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 299 int ierr,m = a->mbs,i,i2,nz,idx; 300 const int *diag,*ai = a->i,*aj = a->j,*vi; 301 302 PetscFunctionBegin; 303 its = its*lits; 304 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 305 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 306 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 307 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 308 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 309 310 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 311 312 diag = a->diag; 313 idiag = a->idiag; 314 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 315 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 316 317 if (flag & SOR_ZERO_INITIAL_GUESS) { 318 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 319 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 320 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 321 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 322 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 323 i2 = 4; 324 idiag += 16; 325 for (i=1; i<m; i++) { 326 v = aa + 16*ai[i]; 327 vi = aj + ai[i]; 328 nz = diag[i] - ai[i]; 329 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 330 while (nz--) { 331 idx = 4*(*vi++); 332 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 333 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 334 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 335 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 336 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 337 v += 16; 338 } 339 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 340 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 341 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 342 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 343 idiag += 16; 344 i2 += 4; 345 } 346 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 347 PetscLogFlops(16*(a->nz)); 348 } 349 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 350 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 351 i2 = 0; 352 mdiag = a->idiag+16*a->mbs; 353 for (i=0; i<m; i++) { 354 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 355 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 356 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 357 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 358 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 359 mdiag += 16; 360 i2 += 4; 361 } 362 PetscLogFlops(28*m); 363 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 364 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 365 } 366 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 367 idiag = a->idiag+16*a->mbs - 16; 368 i2 = 4*m - 4; 369 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 370 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 371 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 372 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 373 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 374 idiag -= 16; 375 i2 -= 4; 376 for (i=m-2; i>=0; i--) { 377 v = aa + 16*(diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 381 while (nz--) { 382 idx = 4*(*vi++); 383 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 384 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 385 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 386 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 387 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 388 v += 16; 389 } 390 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 391 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 392 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 393 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 394 idiag -= 16; 395 i2 -= 4; 396 } 397 PetscLogFlops(16*(a->nz)); 398 } 399 } else { 400 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 401 } 402 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 403 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 409 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 410 { 411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 412 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 413 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 414 int ierr,m = a->mbs,i,i2,nz,idx; 415 const int *diag,*ai = a->i,*aj = a->j,*vi; 416 417 PetscFunctionBegin; 418 its = its*lits; 419 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 420 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 421 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 422 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 423 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 424 425 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 426 427 diag = a->diag; 428 idiag = a->idiag; 429 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 431 432 if (flag & SOR_ZERO_INITIAL_GUESS) { 433 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 434 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 435 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 436 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 437 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 438 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 439 i2 = 5; 440 idiag += 25; 441 for (i=1; i<m; i++) { 442 v = aa + 25*ai[i]; 443 vi = aj + ai[i]; 444 nz = diag[i] - ai[i]; 445 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 446 while (nz--) { 447 idx = 5*(*vi++); 448 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 449 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 450 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 451 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 452 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 453 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 454 v += 25; 455 } 456 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 457 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 458 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 459 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 460 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 461 idiag += 25; 462 i2 += 5; 463 } 464 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 465 PetscLogFlops(25*(a->nz)); 466 } 467 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 468 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 469 i2 = 0; 470 mdiag = a->idiag+25*a->mbs; 471 for (i=0; i<m; i++) { 472 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 473 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 474 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 475 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 476 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 477 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 478 mdiag += 25; 479 i2 += 5; 480 } 481 PetscLogFlops(45*m); 482 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 483 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 484 } 485 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 486 idiag = a->idiag+25*a->mbs - 25; 487 i2 = 5*m - 5; 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 489 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 490 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 491 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 492 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 493 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 494 idiag -= 25; 495 i2 -= 5; 496 for (i=m-2; i>=0; i--) { 497 v = aa + 25*(diag[i]+1); 498 vi = aj + diag[i] + 1; 499 nz = ai[i+1] - diag[i] - 1; 500 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 501 while (nz--) { 502 idx = 5*(*vi++); 503 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 504 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 505 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 506 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 507 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 508 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 509 v += 25; 510 } 511 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 512 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 513 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 514 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 515 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 516 idiag -= 25; 517 i2 -= 5; 518 } 519 PetscLogFlops(25*(a->nz)); 520 } 521 } else { 522 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 523 } 524 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 525 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 /* 530 Special version for Fun3d sequential benchmark 531 */ 532 #if defined(PETSC_HAVE_FORTRAN_CAPS) 533 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 534 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 535 #define matsetvaluesblocked4_ matsetvaluesblocked4 536 #endif 537 538 EXTERN_C_BEGIN 539 #undef __FUNCT__ 540 #define __FUNCT__ "matsetvaluesblocked4_" 541 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 542 { 543 Mat A = *AA; 544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 545 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 546 int *ai=a->i,*ailen=a->ilen; 547 int *aj=a->j,stepval; 548 const PetscScalar *value = v; 549 MatScalar *ap,*aa = a->a,*bap; 550 551 PetscFunctionBegin; 552 stepval = (n-1)*4; 553 for (k=0; k<m; k++) { /* loop over added rows */ 554 row = im[k]; 555 rp = aj + ai[row]; 556 ap = aa + 16*ai[row]; 557 nrow = ailen[row]; 558 low = 0; 559 for (l=0; l<n; l++) { /* loop over added columns */ 560 col = in[l]; 561 value = v + k*(stepval+4)*4 + l*4; 562 low = 0; high = nrow; 563 while (high-low > 7) { 564 t = (low+high)/2; 565 if (rp[t] > col) high = t; 566 else low = t; 567 } 568 for (i=low; i<high; i++) { 569 if (rp[i] > col) break; 570 if (rp[i] == col) { 571 bap = ap + 16*i; 572 for (ii=0; ii<4; ii++,value+=stepval) { 573 for (jj=ii; jj<16; jj+=4) { 574 bap[jj] += *value++; 575 } 576 } 577 goto noinsert2; 578 } 579 } 580 N = nrow++ - 1; 581 /* shift up all the later entries in this row */ 582 for (ii=N; ii>=i; ii--) { 583 rp[ii+1] = rp[ii]; 584 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 585 } 586 if (N >= i) { 587 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 588 } 589 rp[i] = col; 590 bap = ap + 16*i; 591 for (ii=0; ii<4; ii++,value+=stepval) { 592 for (jj=ii; jj<16; jj+=4) { 593 bap[jj] = *value++; 594 } 595 } 596 noinsert2:; 597 low = i; 598 } 599 ailen[row] = nrow; 600 } 601 } 602 EXTERN_C_END 603 604 #if defined(PETSC_HAVE_FORTRAN_CAPS) 605 #define matsetvalues4_ MATSETVALUES4 606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 607 #define matsetvalues4_ matsetvalues4 608 #endif 609 610 EXTERN_C_BEGIN 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatSetValues4_" 613 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 614 { 615 Mat A = *AA; 616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 617 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 618 int *ai=a->i,*ailen=a->ilen; 619 int *aj=a->j,brow,bcol; 620 int ridx,cidx; 621 MatScalar *ap,value,*aa=a->a,*bap; 622 623 PetscFunctionBegin; 624 for (k=0; k<m; k++) { /* loop over added rows */ 625 row = im[k]; brow = row/4; 626 rp = aj + ai[brow]; 627 ap = aa + 16*ai[brow]; 628 nrow = ailen[brow]; 629 low = 0; 630 for (l=0; l<n; l++) { /* loop over added columns */ 631 col = in[l]; bcol = col/4; 632 ridx = row % 4; cidx = col % 4; 633 value = v[l + k*n]; 634 low = 0; high = nrow; 635 while (high-low > 7) { 636 t = (low+high)/2; 637 if (rp[t] > bcol) high = t; 638 else low = t; 639 } 640 for (i=low; i<high; i++) { 641 if (rp[i] > bcol) break; 642 if (rp[i] == bcol) { 643 bap = ap + 16*i + 4*cidx + ridx; 644 *bap += value; 645 goto noinsert1; 646 } 647 } 648 N = nrow++ - 1; 649 /* shift up all the later entries in this row */ 650 for (ii=N; ii>=i; ii--) { 651 rp[ii+1] = rp[ii]; 652 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 653 } 654 if (N>=i) { 655 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 656 } 657 rp[i] = bcol; 658 ap[16*i + 4*cidx + ridx] = value; 659 noinsert1:; 660 low = i; 661 } 662 ailen[brow] = nrow; 663 } 664 } 665 EXTERN_C_END 666 667 /* UGLY, ugly, ugly 668 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 669 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 670 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 671 converts the entries into single precision and then calls ..._MatScalar() to put them 672 into the single precision data structures. 673 */ 674 #if defined(PETSC_USE_MAT_SINGLE) 675 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 676 #else 677 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 678 #endif 679 680 #define CHUNKSIZE 10 681 682 /* 683 Checks for missing diagonals 684 */ 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 687 int MatMissingDiagonal_SeqBAIJ(Mat A) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 int *diag,*jj = a->j,i,ierr; 691 692 PetscFunctionBegin; 693 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 694 diag = a->diag; 695 for (i=0; i<a->mbs; i++) { 696 if (jj[diag[i]] != i) { 697 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 698 } 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 705 int MatMarkDiagonal_SeqBAIJ(Mat A) 706 { 707 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 708 int i,j,*diag,m = a->mbs,ierr; 709 710 PetscFunctionBegin; 711 if (a->diag) PetscFunctionReturn(0); 712 713 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 714 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 715 for (i=0; i<m; i++) { 716 diag[i] = a->i[i+1]; 717 for (j=a->i[i]; j<a->i[i+1]; j++) { 718 if (a->j[j] == i) { 719 diag[i] = j; 720 break; 721 } 722 } 723 } 724 a->diag = diag; 725 PetscFunctionReturn(0); 726 } 727 728 729 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 733 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 int ierr,n = a->mbs,i; 737 738 PetscFunctionBegin; 739 *nn = n; 740 if (!ia) PetscFunctionReturn(0); 741 if (symmetric) { 742 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 743 } else if (oshift == 1) { 744 /* temporarily add 1 to i and j indices */ 745 int nz = a->i[n]; 746 for (i=0; i<nz; i++) a->j[i]++; 747 for (i=0; i<n+1; i++) a->i[i]++; 748 *ia = a->i; *ja = a->j; 749 } else { 750 *ia = a->i; *ja = a->j; 751 } 752 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 758 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 int i,n = a->mbs,ierr; 762 763 PetscFunctionBegin; 764 if (!ia) PetscFunctionReturn(0); 765 if (symmetric) { 766 ierr = PetscFree(*ia);CHKERRQ(ierr); 767 ierr = PetscFree(*ja);CHKERRQ(ierr); 768 } else if (oshift == 1) { 769 int nz = a->i[n]-1; 770 for (i=0; i<nz; i++) a->j[i]--; 771 for (i=0; i<n+1; i++) a->i[i]--; 772 } 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 778 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 779 { 780 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 781 782 PetscFunctionBegin; 783 *bs = baij->bs; 784 PetscFunctionReturn(0); 785 } 786 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatDestroy_SeqBAIJ" 790 int MatDestroy_SeqBAIJ(Mat A) 791 { 792 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 793 int ierr; 794 795 PetscFunctionBegin; 796 #if defined(PETSC_USE_LOG) 797 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 798 #endif 799 ierr = PetscFree(a->a);CHKERRQ(ierr); 800 if (!a->singlemalloc) { 801 ierr = PetscFree(a->i);CHKERRQ(ierr); 802 ierr = PetscFree(a->j);CHKERRQ(ierr); 803 } 804 if (a->row) { 805 ierr = ISDestroy(a->row);CHKERRQ(ierr); 806 } 807 if (a->col) { 808 ierr = ISDestroy(a->col);CHKERRQ(ierr); 809 } 810 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 811 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 812 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 813 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 814 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 815 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 816 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 817 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 818 #if defined(PETSC_USE_MAT_SINGLE) 819 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 820 #endif 821 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 822 823 ierr = PetscFree(a);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSetOption_SeqBAIJ" 829 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 832 833 PetscFunctionBegin; 834 switch (op) { 835 case MAT_ROW_ORIENTED: 836 a->roworiented = PETSC_TRUE; 837 break; 838 case MAT_COLUMN_ORIENTED: 839 a->roworiented = PETSC_FALSE; 840 break; 841 case MAT_COLUMNS_SORTED: 842 a->sorted = PETSC_TRUE; 843 break; 844 case MAT_COLUMNS_UNSORTED: 845 a->sorted = PETSC_FALSE; 846 break; 847 case MAT_KEEP_ZEROED_ROWS: 848 a->keepzeroedrows = PETSC_TRUE; 849 break; 850 case MAT_NO_NEW_NONZERO_LOCATIONS: 851 a->nonew = 1; 852 break; 853 case MAT_NEW_NONZERO_LOCATION_ERR: 854 a->nonew = -1; 855 break; 856 case MAT_NEW_NONZERO_ALLOCATION_ERR: 857 a->nonew = -2; 858 break; 859 case MAT_YES_NEW_NONZERO_LOCATIONS: 860 a->nonew = 0; 861 break; 862 case MAT_ROWS_SORTED: 863 case MAT_ROWS_UNSORTED: 864 case MAT_YES_NEW_DIAGONALS: 865 case MAT_IGNORE_OFF_PROC_ENTRIES: 866 case MAT_USE_HASH_TABLE: 867 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 868 break; 869 case MAT_NO_NEW_DIAGONALS: 870 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 871 case MAT_SYMMETRIC: 872 case MAT_STRUCTURALLY_SYMMETRIC: 873 case MAT_NOT_SYMMETRIC: 874 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 875 case MAT_HERMITIAN: 876 case MAT_NOT_HERMITIAN: 877 case MAT_SYMMETRY_ETERNAL: 878 case MAT_NOT_SYMMETRY_ETERNAL: 879 break; 880 default: 881 SETERRQ(PETSC_ERR_SUP,"unknown option"); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatGetRow_SeqBAIJ" 888 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 891 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 892 MatScalar *aa,*aa_i; 893 PetscScalar *v_i; 894 895 PetscFunctionBegin; 896 bs = a->bs; 897 ai = a->i; 898 aj = a->j; 899 aa = a->a; 900 bs2 = a->bs2; 901 902 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 903 904 bn = row/bs; /* Block number */ 905 bp = row % bs; /* Block Position */ 906 M = ai[bn+1] - ai[bn]; 907 *nz = bs*M; 908 909 if (v) { 910 *v = 0; 911 if (*nz) { 912 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 913 for (i=0; i<M; i++) { /* for each block in the block row */ 914 v_i = *v + i*bs; 915 aa_i = aa + bs2*(ai[bn] + i); 916 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 917 } 918 } 919 } 920 921 if (idx) { 922 *idx = 0; 923 if (*nz) { 924 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 925 for (i=0; i<M; i++) { /* for each block in the block row */ 926 idx_i = *idx + i*bs; 927 itmp = bs*aj[ai[bn] + i]; 928 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 929 } 930 } 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 937 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 938 { 939 int ierr; 940 941 PetscFunctionBegin; 942 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 943 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatTranspose_SeqBAIJ" 949 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 950 { 951 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 952 Mat C; 953 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 954 int *rows,*cols,bs2=a->bs2; 955 PetscScalar *array; 956 957 PetscFunctionBegin; 958 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 959 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 960 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 961 962 #if defined(PETSC_USE_MAT_SINGLE) 963 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 964 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 965 #else 966 array = a->a; 967 #endif 968 969 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 970 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 971 ierr = PetscFree(col);CHKERRQ(ierr); 972 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 973 cols = rows + bs; 974 for (i=0; i<mbs; i++) { 975 cols[0] = i*bs; 976 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 977 len = ai[i+1] - ai[i]; 978 for (j=0; j<len; j++) { 979 rows[0] = (*aj++)*bs; 980 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 981 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 982 array += bs2; 983 } 984 } 985 ierr = PetscFree(rows);CHKERRQ(ierr); 986 #if defined(PETSC_USE_MAT_SINGLE) 987 ierr = PetscFree(array); 988 #endif 989 990 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 993 if (B) { 994 *B = C; 995 } else { 996 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1003 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1004 { 1005 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1006 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1007 PetscScalar *aa; 1008 FILE *file; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1012 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1013 col_lens[0] = MAT_FILE_COOKIE; 1014 1015 col_lens[1] = A->m; 1016 col_lens[2] = A->n; 1017 col_lens[3] = a->nz*bs2; 1018 1019 /* store lengths of each row and write (including header) to file */ 1020 count = 0; 1021 for (i=0; i<a->mbs; i++) { 1022 for (j=0; j<bs; j++) { 1023 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1024 } 1025 } 1026 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1027 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1028 1029 /* store column indices (zero start index) */ 1030 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1031 count = 0; 1032 for (i=0; i<a->mbs; i++) { 1033 for (j=0; j<bs; j++) { 1034 for (k=a->i[i]; k<a->i[i+1]; k++) { 1035 for (l=0; l<bs; l++) { 1036 jj[count++] = bs*a->j[k] + l; 1037 } 1038 } 1039 } 1040 } 1041 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1042 ierr = PetscFree(jj);CHKERRQ(ierr); 1043 1044 /* store nonzero values */ 1045 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1046 count = 0; 1047 for (i=0; i<a->mbs; i++) { 1048 for (j=0; j<bs; j++) { 1049 for (k=a->i[i]; k<a->i[i+1]; k++) { 1050 for (l=0; l<bs; l++) { 1051 aa[count++] = a->a[bs2*k + l*bs + j]; 1052 } 1053 } 1054 } 1055 } 1056 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1057 ierr = PetscFree(aa);CHKERRQ(ierr); 1058 1059 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1060 if (file) { 1061 fprintf(file,"-matload_block_size %d\n",a->bs); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1068 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1069 { 1070 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1071 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 1072 PetscViewerFormat format; 1073 1074 PetscFunctionBegin; 1075 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1076 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1077 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1078 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1079 Mat aij; 1080 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1081 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1082 ierr = MatDestroy(aij);CHKERRQ(ierr); 1083 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1086 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1087 for (i=0; i<a->mbs; i++) { 1088 for (j=0; j<bs; j++) { 1089 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1090 for (k=a->i[i]; k<a->i[i+1]; k++) { 1091 for (l=0; l<bs; l++) { 1092 #if defined(PETSC_USE_COMPLEX) 1093 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1095 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1096 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1097 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1098 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1099 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1100 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1101 } 1102 #else 1103 if (a->a[bs2*k + l*bs + j] != 0.0) { 1104 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1105 } 1106 #endif 1107 } 1108 } 1109 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1110 } 1111 } 1112 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1113 } else { 1114 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1115 for (i=0; i<a->mbs; i++) { 1116 for (j=0; j<bs; j++) { 1117 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1118 for (k=a->i[i]; k<a->i[i+1]; k++) { 1119 for (l=0; l<bs; l++) { 1120 #if defined(PETSC_USE_COMPLEX) 1121 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1122 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1123 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1124 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1125 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1126 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1127 } else { 1128 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1129 } 1130 #else 1131 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1132 #endif 1133 } 1134 } 1135 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1136 } 1137 } 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1139 } 1140 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1146 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1147 { 1148 Mat A = (Mat) Aa; 1149 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1150 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1151 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1152 MatScalar *aa; 1153 PetscViewer viewer; 1154 1155 PetscFunctionBegin; 1156 1157 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1158 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1159 1160 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1161 1162 /* loop over matrix elements drawing boxes */ 1163 color = PETSC_DRAW_BLUE; 1164 for (i=0,row=0; i<mbs; i++,row+=bs) { 1165 for (j=a->i[i]; j<a->i[i+1]; j++) { 1166 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1167 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1168 aa = a->a + j*bs2; 1169 for (k=0; k<bs; k++) { 1170 for (l=0; l<bs; l++) { 1171 if (PetscRealPart(*aa++) >= 0.) continue; 1172 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1173 } 1174 } 1175 } 1176 } 1177 color = PETSC_DRAW_CYAN; 1178 for (i=0,row=0; i<mbs; i++,row+=bs) { 1179 for (j=a->i[i]; j<a->i[i+1]; j++) { 1180 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1181 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1182 aa = a->a + j*bs2; 1183 for (k=0; k<bs; k++) { 1184 for (l=0; l<bs; l++) { 1185 if (PetscRealPart(*aa++) != 0.) continue; 1186 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1187 } 1188 } 1189 } 1190 } 1191 1192 color = PETSC_DRAW_RED; 1193 for (i=0,row=0; i<mbs; i++,row+=bs) { 1194 for (j=a->i[i]; j<a->i[i+1]; j++) { 1195 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1196 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1197 aa = a->a + j*bs2; 1198 for (k=0; k<bs; k++) { 1199 for (l=0; l<bs; l++) { 1200 if (PetscRealPart(*aa++) <= 0.) continue; 1201 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1202 } 1203 } 1204 } 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1211 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1212 { 1213 int ierr; 1214 PetscReal xl,yl,xr,yr,w,h; 1215 PetscDraw draw; 1216 PetscTruth isnull; 1217 1218 PetscFunctionBegin; 1219 1220 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1221 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1222 1223 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1224 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1225 xr += w; yr += h; xl = -w; yl = -h; 1226 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1227 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1228 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatView_SeqBAIJ" 1234 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1235 { 1236 int ierr; 1237 PetscTruth isascii,isbinary,isdraw; 1238 1239 PetscFunctionBegin; 1240 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1241 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1242 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1243 if (isascii){ 1244 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1245 } else if (isbinary) { 1246 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1247 } else if (isdraw) { 1248 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1249 } else { 1250 Mat B; 1251 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1252 ierr = MatView(B,viewer);CHKERRQ(ierr); 1253 ierr = MatDestroy(B);CHKERRQ(ierr); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1261 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1262 { 1263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1264 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1265 int *ai = a->i,*ailen = a->ilen; 1266 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1267 MatScalar *ap,*aa = a->a,zero = 0.0; 1268 1269 PetscFunctionBegin; 1270 for (k=0; k<m; k++) { /* loop over rows */ 1271 row = im[k]; brow = row/bs; 1272 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1273 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1274 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1275 nrow = ailen[brow]; 1276 for (l=0; l<n; l++) { /* loop over columns */ 1277 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1278 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1279 col = in[l] ; 1280 bcol = col/bs; 1281 cidx = col%bs; 1282 ridx = row%bs; 1283 high = nrow; 1284 low = 0; /* assume unsorted */ 1285 while (high-low > 5) { 1286 t = (low+high)/2; 1287 if (rp[t] > bcol) high = t; 1288 else low = t; 1289 } 1290 for (i=low; i<high; i++) { 1291 if (rp[i] > bcol) break; 1292 if (rp[i] == bcol) { 1293 *v++ = ap[bs2*i+bs*cidx+ridx]; 1294 goto finished; 1295 } 1296 } 1297 *v++ = zero; 1298 finished:; 1299 } 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #if defined(PETSC_USE_MAT_SINGLE) 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1307 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1308 { 1309 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1310 int ierr,i,N = m*n*b->bs2; 1311 MatScalar *vsingle; 1312 1313 PetscFunctionBegin; 1314 if (N > b->setvalueslen) { 1315 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1316 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1317 b->setvalueslen = N; 1318 } 1319 vsingle = b->setvaluescopy; 1320 for (i=0; i<N; i++) { 1321 vsingle[i] = v[i]; 1322 } 1323 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 #endif 1327 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1331 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1332 { 1333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1334 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1335 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1336 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 1337 PetscTruth roworiented=a->roworiented; 1338 const MatScalar *value = v; 1339 MatScalar *ap,*aa = a->a,*bap; 1340 1341 PetscFunctionBegin; 1342 if (roworiented) { 1343 stepval = (n-1)*bs; 1344 } else { 1345 stepval = (m-1)*bs; 1346 } 1347 for (k=0; k<m; k++) { /* loop over added rows */ 1348 row = im[k]; 1349 if (row < 0) continue; 1350 #if defined(PETSC_USE_BOPT_g) 1351 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1352 #endif 1353 rp = aj + ai[row]; 1354 ap = aa + bs2*ai[row]; 1355 rmax = imax[row]; 1356 nrow = ailen[row]; 1357 low = 0; 1358 for (l=0; l<n; l++) { /* loop over added columns */ 1359 if (in[l] < 0) continue; 1360 #if defined(PETSC_USE_BOPT_g) 1361 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1362 #endif 1363 col = in[l]; 1364 if (roworiented) { 1365 value = v + k*(stepval+bs)*bs + l*bs; 1366 } else { 1367 value = v + l*(stepval+bs)*bs + k*bs; 1368 } 1369 if (!sorted) low = 0; high = nrow; 1370 while (high-low > 7) { 1371 t = (low+high)/2; 1372 if (rp[t] > col) high = t; 1373 else low = t; 1374 } 1375 for (i=low; i<high; i++) { 1376 if (rp[i] > col) break; 1377 if (rp[i] == col) { 1378 bap = ap + bs2*i; 1379 if (roworiented) { 1380 if (is == ADD_VALUES) { 1381 for (ii=0; ii<bs; ii++,value+=stepval) { 1382 for (jj=ii; jj<bs2; jj+=bs) { 1383 bap[jj] += *value++; 1384 } 1385 } 1386 } else { 1387 for (ii=0; ii<bs; ii++,value+=stepval) { 1388 for (jj=ii; jj<bs2; jj+=bs) { 1389 bap[jj] = *value++; 1390 } 1391 } 1392 } 1393 } else { 1394 if (is == ADD_VALUES) { 1395 for (ii=0; ii<bs; ii++,value+=stepval) { 1396 for (jj=0; jj<bs; jj++) { 1397 *bap++ += *value++; 1398 } 1399 } 1400 } else { 1401 for (ii=0; ii<bs; ii++,value+=stepval) { 1402 for (jj=0; jj<bs; jj++) { 1403 *bap++ = *value++; 1404 } 1405 } 1406 } 1407 } 1408 goto noinsert2; 1409 } 1410 } 1411 if (nonew == 1) goto noinsert2; 1412 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1413 if (nrow >= rmax) { 1414 /* there is no extra room in row, therefore enlarge */ 1415 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1416 MatScalar *new_a; 1417 1418 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1419 1420 /* malloc new storage space */ 1421 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1422 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1423 new_j = (int*)(new_a + bs2*new_nz); 1424 new_i = new_j + new_nz; 1425 1426 /* copy over old data into new slots */ 1427 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1428 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1429 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1430 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1431 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1432 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1433 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1434 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1435 /* free up old matrix storage */ 1436 ierr = PetscFree(a->a);CHKERRQ(ierr); 1437 if (!a->singlemalloc) { 1438 ierr = PetscFree(a->i);CHKERRQ(ierr); 1439 ierr = PetscFree(a->j);CHKERRQ(ierr); 1440 } 1441 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1442 a->singlemalloc = PETSC_TRUE; 1443 1444 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1445 rmax = imax[row] = imax[row] + CHUNKSIZE; 1446 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1447 a->maxnz += bs2*CHUNKSIZE; 1448 a->reallocs++; 1449 a->nz++; 1450 } 1451 N = nrow++ - 1; 1452 /* shift up all the later entries in this row */ 1453 for (ii=N; ii>=i; ii--) { 1454 rp[ii+1] = rp[ii]; 1455 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1456 } 1457 if (N >= i) { 1458 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1459 } 1460 rp[i] = col; 1461 bap = ap + bs2*i; 1462 if (roworiented) { 1463 for (ii=0; ii<bs; ii++,value+=stepval) { 1464 for (jj=ii; jj<bs2; jj+=bs) { 1465 bap[jj] = *value++; 1466 } 1467 } 1468 } else { 1469 for (ii=0; ii<bs; ii++,value+=stepval) { 1470 for (jj=0; jj<bs; jj++) { 1471 *bap++ = *value++; 1472 } 1473 } 1474 } 1475 noinsert2:; 1476 low = i; 1477 } 1478 ailen[row] = nrow; 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1485 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1489 int m = A->m,*ip,N,*ailen = a->ilen; 1490 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 1491 MatScalar *aa = a->a,*ap; 1492 1493 PetscFunctionBegin; 1494 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1495 1496 if (m) rmax = ailen[0]; 1497 for (i=1; i<mbs; i++) { 1498 /* move each row back by the amount of empty slots (fshift) before it*/ 1499 fshift += imax[i-1] - ailen[i-1]; 1500 rmax = PetscMax(rmax,ailen[i]); 1501 if (fshift) { 1502 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1503 N = ailen[i]; 1504 for (j=0; j<N; j++) { 1505 ip[j-fshift] = ip[j]; 1506 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 } 1508 } 1509 ai[i] = ai[i-1] + ailen[i-1]; 1510 } 1511 if (mbs) { 1512 fshift += imax[mbs-1] - ailen[mbs-1]; 1513 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1514 } 1515 /* reset ilen and imax for each row */ 1516 for (i=0; i<mbs; i++) { 1517 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1518 } 1519 a->nz = ai[mbs]; 1520 1521 /* diagonals may have moved, so kill the diagonal pointers */ 1522 a->idiagvalid = PETSC_FALSE; 1523 if (fshift && a->diag) { 1524 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1525 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1526 a->diag = 0; 1527 } 1528 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1529 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1530 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1531 a->reallocs = 0; 1532 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1533 1534 PetscFunctionReturn(0); 1535 } 1536 1537 1538 1539 /* 1540 This function returns an array of flags which indicate the locations of contiguous 1541 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1542 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1543 Assume: sizes should be long enough to hold all the values. 1544 */ 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1547 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1548 { 1549 int i,j,k,row; 1550 PetscTruth flg; 1551 1552 PetscFunctionBegin; 1553 for (i=0,j=0; i<n; j++) { 1554 row = idx[i]; 1555 if (row%bs!=0) { /* Not the begining of a block */ 1556 sizes[j] = 1; 1557 i++; 1558 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1559 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1560 i++; 1561 } else { /* Begining of the block, so check if the complete block exists */ 1562 flg = PETSC_TRUE; 1563 for (k=1; k<bs; k++) { 1564 if (row+k != idx[i+k]) { /* break in the block */ 1565 flg = PETSC_FALSE; 1566 break; 1567 } 1568 } 1569 if (flg == PETSC_TRUE) { /* No break in the bs */ 1570 sizes[j] = bs; 1571 i+= bs; 1572 } else { 1573 sizes[j] = 1; 1574 i++; 1575 } 1576 } 1577 } 1578 *bs_max = j; 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1584 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1585 { 1586 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1587 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1588 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1589 PetscScalar zero = 0.0; 1590 MatScalar *aa; 1591 1592 PetscFunctionBegin; 1593 /* Make a copy of the IS and sort it */ 1594 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1595 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1596 1597 /* allocate memory for rows,sizes */ 1598 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1599 sizes = rows + is_n; 1600 1601 /* copy IS values to rows, and sort them */ 1602 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1603 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1604 if (baij->keepzeroedrows) { 1605 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1606 bs_max = is_n; 1607 } else { 1608 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1609 } 1610 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1611 1612 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1613 row = rows[j]; 1614 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1615 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1616 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1617 if (sizes[i] == bs && !baij->keepzeroedrows) { 1618 if (diag) { 1619 if (baij->ilen[row/bs] > 0) { 1620 baij->ilen[row/bs] = 1; 1621 baij->j[baij->i[row/bs]] = row/bs; 1622 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1623 } 1624 /* Now insert all the diagonal values for this bs */ 1625 for (k=0; k<bs; k++) { 1626 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1627 } 1628 } else { /* (!diag) */ 1629 baij->ilen[row/bs] = 0; 1630 } /* end (!diag) */ 1631 } else { /* (sizes[i] != bs) */ 1632 #if defined (PETSC_USE_DEBUG) 1633 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1634 #endif 1635 for (k=0; k<count; k++) { 1636 aa[0] = zero; 1637 aa += bs; 1638 } 1639 if (diag) { 1640 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 1645 ierr = PetscFree(rows);CHKERRQ(ierr); 1646 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1652 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1653 { 1654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1655 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1656 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1657 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1658 int ridx,cidx,bs2=a->bs2,ierr; 1659 PetscTruth roworiented=a->roworiented; 1660 MatScalar *ap,value,*aa=a->a,*bap; 1661 1662 PetscFunctionBegin; 1663 for (k=0; k<m; k++) { /* loop over added rows */ 1664 row = im[k]; brow = row/bs; 1665 if (row < 0) continue; 1666 #if defined(PETSC_USE_BOPT_g) 1667 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1668 #endif 1669 rp = aj + ai[brow]; 1670 ap = aa + bs2*ai[brow]; 1671 rmax = imax[brow]; 1672 nrow = ailen[brow]; 1673 low = 0; 1674 for (l=0; l<n; l++) { /* loop over added columns */ 1675 if (in[l] < 0) continue; 1676 #if defined(PETSC_USE_BOPT_g) 1677 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1678 #endif 1679 col = in[l]; bcol = col/bs; 1680 ridx = row % bs; cidx = col % bs; 1681 if (roworiented) { 1682 value = v[l + k*n]; 1683 } else { 1684 value = v[k + l*m]; 1685 } 1686 if (!sorted) low = 0; high = nrow; 1687 while (high-low > 7) { 1688 t = (low+high)/2; 1689 if (rp[t] > bcol) high = t; 1690 else low = t; 1691 } 1692 for (i=low; i<high; i++) { 1693 if (rp[i] > bcol) break; 1694 if (rp[i] == bcol) { 1695 bap = ap + bs2*i + bs*cidx + ridx; 1696 if (is == ADD_VALUES) *bap += value; 1697 else *bap = value; 1698 goto noinsert1; 1699 } 1700 } 1701 if (nonew == 1) goto noinsert1; 1702 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1703 if (nrow >= rmax) { 1704 /* there is no extra room in row, therefore enlarge */ 1705 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1706 MatScalar *new_a; 1707 1708 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1709 1710 /* Malloc new storage space */ 1711 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1712 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1713 new_j = (int*)(new_a + bs2*new_nz); 1714 new_i = new_j + new_nz; 1715 1716 /* copy over old data into new slots */ 1717 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1718 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1719 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1720 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1721 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1722 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1723 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1724 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1725 /* free up old matrix storage */ 1726 ierr = PetscFree(a->a);CHKERRQ(ierr); 1727 if (!a->singlemalloc) { 1728 ierr = PetscFree(a->i);CHKERRQ(ierr); 1729 ierr = PetscFree(a->j);CHKERRQ(ierr); 1730 } 1731 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1732 a->singlemalloc = PETSC_TRUE; 1733 1734 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1735 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1736 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1737 a->maxnz += bs2*CHUNKSIZE; 1738 a->reallocs++; 1739 a->nz++; 1740 } 1741 N = nrow++ - 1; 1742 /* shift up all the later entries in this row */ 1743 for (ii=N; ii>=i; ii--) { 1744 rp[ii+1] = rp[ii]; 1745 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1746 } 1747 if (N>=i) { 1748 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1749 } 1750 rp[i] = bcol; 1751 ap[bs2*i + bs*cidx + ridx] = value; 1752 noinsert1:; 1753 low = i; 1754 } 1755 ailen[brow] = nrow; 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1763 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1764 { 1765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1766 Mat outA; 1767 int ierr; 1768 PetscTruth row_identity,col_identity; 1769 1770 PetscFunctionBegin; 1771 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1772 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1773 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1774 if (!row_identity || !col_identity) { 1775 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1776 } 1777 1778 outA = inA; 1779 inA->factor = FACTOR_LU; 1780 1781 if (!a->diag) { 1782 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1783 } 1784 1785 a->row = row; 1786 a->col = col; 1787 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1788 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1789 1790 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1791 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1792 PetscLogObjectParent(inA,a->icol); 1793 1794 /* 1795 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1796 for ILU(0) factorization with natural ordering 1797 */ 1798 if (a->bs < 8) { 1799 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1800 } else { 1801 if (!a->solve_work) { 1802 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1803 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1804 } 1805 } 1806 1807 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1813 int MatPrintHelp_SeqBAIJ(Mat A) 1814 { 1815 static PetscTruth called = PETSC_FALSE; 1816 MPI_Comm comm = A->comm; 1817 int ierr; 1818 1819 PetscFunctionBegin; 1820 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1821 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1822 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 EXTERN_C_BEGIN 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1829 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1830 { 1831 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1832 int i,nz,nbs; 1833 1834 PetscFunctionBegin; 1835 nz = baij->maxnz/baij->bs2; 1836 nbs = baij->nbs; 1837 for (i=0; i<nz; i++) { 1838 baij->j[i] = indices[i]; 1839 } 1840 baij->nz = nz; 1841 for (i=0; i<nbs; i++) { 1842 baij->ilen[i] = baij->imax[i]; 1843 } 1844 1845 PetscFunctionReturn(0); 1846 } 1847 EXTERN_C_END 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1851 /*@ 1852 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1853 in the matrix. 1854 1855 Input Parameters: 1856 + mat - the SeqBAIJ matrix 1857 - indices - the column indices 1858 1859 Level: advanced 1860 1861 Notes: 1862 This can be called if you have precomputed the nonzero structure of the 1863 matrix and want to provide it to the matrix object to improve the performance 1864 of the MatSetValues() operation. 1865 1866 You MUST have set the correct numbers of nonzeros per row in the call to 1867 MatCreateSeqBAIJ(). 1868 1869 MUST be called before any calls to MatSetValues(); 1870 1871 @*/ 1872 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1873 { 1874 int ierr,(*f)(Mat,int *); 1875 1876 PetscFunctionBegin; 1877 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1878 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1879 if (f) { 1880 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1881 } else { 1882 SETERRQ(1,"Wrong type of matrix to set column indices"); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1889 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1890 { 1891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1892 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1893 PetscReal atmp; 1894 PetscScalar *x,zero = 0.0; 1895 MatScalar *aa; 1896 int ncols,brow,krow,kcol; 1897 1898 PetscFunctionBegin; 1899 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1900 bs = a->bs; 1901 aa = a->a; 1902 ai = a->i; 1903 aj = a->j; 1904 mbs = a->mbs; 1905 1906 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1907 ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 1908 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1909 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1910 for (i=0; i<mbs; i++) { 1911 ncols = ai[1] - ai[0]; ai++; 1912 brow = bs*i; 1913 for (j=0; j<ncols; j++){ 1914 /* bcol = bs*(*aj); */ 1915 for (kcol=0; kcol<bs; kcol++){ 1916 for (krow=0; krow<bs; krow++){ 1917 atmp = PetscAbsScalar(*aa); aa++; 1918 row = brow + krow; /* row index */ 1919 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1920 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1921 } 1922 } 1923 aj++; 1924 } 1925 } 1926 ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1932 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1933 { 1934 int ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1943 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1944 { 1945 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1946 PetscFunctionBegin; 1947 *array = a->a; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1953 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1954 { 1955 PetscFunctionBegin; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #include "petscblaslapack.h" 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1962 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1963 { 1964 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1965 int ierr,one=1,i,bs=y->bs,j,bs2; 1966 1967 PetscFunctionBegin; 1968 if (str == SAME_NONZERO_PATTERN) { 1969 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1970 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1971 if (y->xtoy && y->XtoY != X) { 1972 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1973 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1974 } 1975 if (!y->xtoy) { /* get xtoy */ 1976 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1977 y->XtoY = X; 1978 } 1979 bs2 = bs*bs; 1980 for (i=0; i<x->nz; i++) { 1981 j = 0; 1982 while (j < bs2){ 1983 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1984 j++; 1985 } 1986 } 1987 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1988 } else { 1989 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1990 } 1991 PetscFunctionReturn(0); 1992 } 1993 1994 /* -------------------------------------------------------------------*/ 1995 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1996 MatGetRow_SeqBAIJ, 1997 MatRestoreRow_SeqBAIJ, 1998 MatMult_SeqBAIJ_N, 1999 /* 4*/ MatMultAdd_SeqBAIJ_N, 2000 MatMultTranspose_SeqBAIJ, 2001 MatMultTransposeAdd_SeqBAIJ, 2002 MatSolve_SeqBAIJ_N, 2003 0, 2004 0, 2005 /*10*/ 0, 2006 MatLUFactor_SeqBAIJ, 2007 0, 2008 0, 2009 MatTranspose_SeqBAIJ, 2010 /*15*/ MatGetInfo_SeqBAIJ, 2011 MatEqual_SeqBAIJ, 2012 MatGetDiagonal_SeqBAIJ, 2013 MatDiagonalScale_SeqBAIJ, 2014 MatNorm_SeqBAIJ, 2015 /*20*/ 0, 2016 MatAssemblyEnd_SeqBAIJ, 2017 0, 2018 MatSetOption_SeqBAIJ, 2019 MatZeroEntries_SeqBAIJ, 2020 /*25*/ MatZeroRows_SeqBAIJ, 2021 MatLUFactorSymbolic_SeqBAIJ, 2022 MatLUFactorNumeric_SeqBAIJ_N, 2023 0, 2024 0, 2025 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2026 MatILUFactorSymbolic_SeqBAIJ, 2027 0, 2028 MatGetArray_SeqBAIJ, 2029 MatRestoreArray_SeqBAIJ, 2030 /*35*/ MatDuplicate_SeqBAIJ, 2031 0, 2032 0, 2033 MatILUFactor_SeqBAIJ, 2034 0, 2035 /*40*/ MatAXPY_SeqBAIJ, 2036 MatGetSubMatrices_SeqBAIJ, 2037 MatIncreaseOverlap_SeqBAIJ, 2038 MatGetValues_SeqBAIJ, 2039 0, 2040 /*45*/ MatPrintHelp_SeqBAIJ, 2041 MatScale_SeqBAIJ, 2042 0, 2043 0, 2044 0, 2045 /*50*/ MatGetBlockSize_SeqBAIJ, 2046 MatGetRowIJ_SeqBAIJ, 2047 MatRestoreRowIJ_SeqBAIJ, 2048 0, 2049 0, 2050 /*55*/ 0, 2051 0, 2052 0, 2053 0, 2054 MatSetValuesBlocked_SeqBAIJ, 2055 /*60*/ MatGetSubMatrix_SeqBAIJ, 2056 MatDestroy_SeqBAIJ, 2057 MatView_SeqBAIJ, 2058 MatGetPetscMaps_Petsc, 2059 0, 2060 /*65*/ 0, 2061 0, 2062 0, 2063 0, 2064 0, 2065 /*70*/ MatGetRowMax_SeqBAIJ, 2066 MatConvert_Basic, 2067 0, 2068 0, 2069 0, 2070 /*75*/ 0, 2071 0, 2072 0, 2073 0, 2074 0, 2075 /*80*/ 0, 2076 0, 2077 0, 2078 0, 2079 /*85*/ MatLoad_SeqBAIJ, 2080 0, 2081 0, 2082 0, 2083 0 2084 }; 2085 2086 EXTERN_C_BEGIN 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2089 int MatStoreValues_SeqBAIJ(Mat mat) 2090 { 2091 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2092 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2093 int ierr; 2094 2095 PetscFunctionBegin; 2096 if (aij->nonew != 1) { 2097 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2098 } 2099 2100 /* allocate space for values if not already there */ 2101 if (!aij->saved_values) { 2102 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2103 } 2104 2105 /* copy values over */ 2106 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2107 PetscFunctionReturn(0); 2108 } 2109 EXTERN_C_END 2110 2111 EXTERN_C_BEGIN 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2114 int MatRetrieveValues_SeqBAIJ(Mat mat) 2115 { 2116 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2117 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 2118 2119 PetscFunctionBegin; 2120 if (aij->nonew != 1) { 2121 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2122 } 2123 if (!aij->saved_values) { 2124 SETERRQ(1,"Must call MatStoreValues(A);first"); 2125 } 2126 2127 /* copy values over */ 2128 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 EXTERN_C_END 2132 2133 EXTERN_C_BEGIN 2134 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2135 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2136 EXTERN_C_END 2137 2138 EXTERN_C_BEGIN 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2141 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2142 { 2143 Mat_SeqBAIJ *b; 2144 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 2145 PetscTruth flg; 2146 2147 PetscFunctionBegin; 2148 2149 B->preallocated = PETSC_TRUE; 2150 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2151 if (nnz && newbs != bs) { 2152 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 2153 } 2154 bs = newbs; 2155 2156 mbs = B->m/bs; 2157 nbs = B->n/bs; 2158 bs2 = bs*bs; 2159 2160 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2161 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2162 } 2163 2164 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2165 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2166 if (nnz) { 2167 for (i=0; i<mbs; i++) { 2168 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2169 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2170 } 2171 } 2172 2173 b = (Mat_SeqBAIJ*)B->data; 2174 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2175 B->ops->solve = MatSolve_SeqBAIJ_Update; 2176 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2177 if (!flg) { 2178 switch (bs) { 2179 case 1: 2180 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2181 B->ops->mult = MatMult_SeqBAIJ_1; 2182 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2183 break; 2184 case 2: 2185 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2186 B->ops->mult = MatMult_SeqBAIJ_2; 2187 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2188 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2189 break; 2190 case 3: 2191 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2192 B->ops->mult = MatMult_SeqBAIJ_3; 2193 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2194 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2195 break; 2196 case 4: 2197 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2198 B->ops->mult = MatMult_SeqBAIJ_4; 2199 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2200 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2201 break; 2202 case 5: 2203 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2204 B->ops->mult = MatMult_SeqBAIJ_5; 2205 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2206 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2207 break; 2208 case 6: 2209 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2210 B->ops->mult = MatMult_SeqBAIJ_6; 2211 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2212 break; 2213 case 7: 2214 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2215 B->ops->mult = MatMult_SeqBAIJ_7; 2216 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2217 break; 2218 default: 2219 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2220 B->ops->mult = MatMult_SeqBAIJ_N; 2221 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2222 break; 2223 } 2224 } 2225 b->bs = bs; 2226 b->mbs = mbs; 2227 b->nbs = nbs; 2228 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2229 if (!nnz) { 2230 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2231 else if (nz <= 0) nz = 1; 2232 for (i=0; i<mbs; i++) b->imax[i] = nz; 2233 nz = nz*mbs; 2234 } else { 2235 nz = 0; 2236 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2237 } 2238 2239 /* allocate the matrix space */ 2240 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2241 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2242 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2243 b->j = (int*)(b->a + nz*bs2); 2244 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2245 b->i = b->j + nz; 2246 b->singlemalloc = PETSC_TRUE; 2247 2248 b->i[0] = 0; 2249 for (i=1; i<mbs+1; i++) { 2250 b->i[i] = b->i[i-1] + b->imax[i-1]; 2251 } 2252 2253 /* b->ilen will count nonzeros in each block row so far. */ 2254 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2255 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2256 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2257 2258 b->bs = bs; 2259 b->bs2 = bs2; 2260 b->mbs = mbs; 2261 b->nz = 0; 2262 b->maxnz = nz*bs2; 2263 B->info.nz_unneeded = (PetscReal)b->maxnz; 2264 PetscFunctionReturn(0); 2265 } 2266 EXTERN_C_END 2267 2268 /*MC 2269 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2270 block sparse compressed row format. 2271 2272 Options Database Keys: 2273 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2274 2275 Level: beginner 2276 2277 .seealso: MatCreateSeqBAIJ 2278 M*/ 2279 2280 EXTERN_C_BEGIN 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatCreate_SeqBAIJ" 2283 int MatCreate_SeqBAIJ(Mat B) 2284 { 2285 int ierr,size; 2286 Mat_SeqBAIJ *b; 2287 2288 PetscFunctionBegin; 2289 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2290 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2291 2292 B->m = B->M = PetscMax(B->m,B->M); 2293 B->n = B->N = PetscMax(B->n,B->N); 2294 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2295 B->data = (void*)b; 2296 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2297 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2298 B->factor = 0; 2299 B->lupivotthreshold = 1.0; 2300 B->mapping = 0; 2301 b->row = 0; 2302 b->col = 0; 2303 b->icol = 0; 2304 b->reallocs = 0; 2305 b->saved_values = 0; 2306 #if defined(PETSC_USE_MAT_SINGLE) 2307 b->setvalueslen = 0; 2308 b->setvaluescopy = PETSC_NULL; 2309 #endif 2310 2311 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2312 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2313 2314 b->sorted = PETSC_FALSE; 2315 b->roworiented = PETSC_TRUE; 2316 b->nonew = 0; 2317 b->diag = 0; 2318 b->solve_work = 0; 2319 b->mult_work = 0; 2320 B->spptr = 0; 2321 B->info.nz_unneeded = (PetscReal)b->maxnz; 2322 b->keepzeroedrows = PETSC_FALSE; 2323 b->xtoy = 0; 2324 b->XtoY = 0; 2325 2326 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2327 "MatStoreValues_SeqBAIJ", 2328 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2329 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2330 "MatRetrieveValues_SeqBAIJ", 2331 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2333 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2334 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2335 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2336 "MatConvert_SeqBAIJ_SeqAIJ", 2337 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2339 "MatConvert_SeqBAIJ_SeqSBAIJ", 2340 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2342 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2343 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2344 PetscFunctionReturn(0); 2345 } 2346 EXTERN_C_END 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2350 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2351 { 2352 Mat C; 2353 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2354 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 2355 2356 PetscFunctionBegin; 2357 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2358 2359 *B = 0; 2360 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2361 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2362 c = (Mat_SeqBAIJ*)C->data; 2363 2364 C->M = A->M; 2365 C->N = A->N; 2366 c->bs = a->bs; 2367 c->bs2 = a->bs2; 2368 c->mbs = a->mbs; 2369 c->nbs = a->nbs; 2370 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2371 2372 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2373 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2374 for (i=0; i<mbs; i++) { 2375 c->imax[i] = a->imax[i]; 2376 c->ilen[i] = a->ilen[i]; 2377 } 2378 2379 /* allocate the matrix space */ 2380 c->singlemalloc = PETSC_TRUE; 2381 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2382 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2383 c->j = (int*)(c->a + nz*bs2); 2384 c->i = c->j + nz; 2385 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2386 if (mbs > 0) { 2387 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2388 if (cpvalues == MAT_COPY_VALUES) { 2389 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2390 } else { 2391 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2392 } 2393 } 2394 2395 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2396 c->sorted = a->sorted; 2397 c->roworiented = a->roworiented; 2398 c->nonew = a->nonew; 2399 2400 if (a->diag) { 2401 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2402 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2403 for (i=0; i<mbs; i++) { 2404 c->diag[i] = a->diag[i]; 2405 } 2406 } else c->diag = 0; 2407 c->nz = a->nz; 2408 c->maxnz = a->maxnz; 2409 c->solve_work = 0; 2410 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2411 c->mult_work = 0; 2412 C->preallocated = PETSC_TRUE; 2413 C->assembled = PETSC_TRUE; 2414 *B = C; 2415 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatLoad_SeqBAIJ" 2421 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2422 { 2423 Mat_SeqBAIJ *a; 2424 Mat B; 2425 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2426 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2427 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2428 int *masked,nmask,tmp,bs2,ishift; 2429 PetscScalar *aa; 2430 MPI_Comm comm = ((PetscObject)viewer)->comm; 2431 2432 PetscFunctionBegin; 2433 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2434 bs2 = bs*bs; 2435 2436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2437 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2438 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2439 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2440 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2441 M = header[1]; N = header[2]; nz = header[3]; 2442 2443 if (header[3] < 0) { 2444 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2445 } 2446 2447 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2448 2449 /* 2450 This code adds extra rows to make sure the number of rows is 2451 divisible by the blocksize 2452 */ 2453 mbs = M/bs; 2454 extra_rows = bs - M + bs*(mbs); 2455 if (extra_rows == bs) extra_rows = 0; 2456 else mbs++; 2457 if (extra_rows) { 2458 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2459 } 2460 2461 /* read in row lengths */ 2462 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2463 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2464 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2465 2466 /* read in column indices */ 2467 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2468 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2469 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2470 2471 /* loop over row lengths determining block row lengths */ 2472 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2473 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2474 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2475 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2476 masked = mask + mbs; 2477 rowcount = 0; nzcount = 0; 2478 for (i=0; i<mbs; i++) { 2479 nmask = 0; 2480 for (j=0; j<bs; j++) { 2481 kmax = rowlengths[rowcount]; 2482 for (k=0; k<kmax; k++) { 2483 tmp = jj[nzcount++]/bs; 2484 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2485 } 2486 rowcount++; 2487 } 2488 browlengths[i] += nmask; 2489 /* zero out the mask elements we set */ 2490 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2491 } 2492 2493 /* create our matrix */ 2494 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2495 ierr = MatSetType(B,type);CHKERRQ(ierr); 2496 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2497 a = (Mat_SeqBAIJ*)B->data; 2498 2499 /* set matrix "i" values */ 2500 a->i[0] = 0; 2501 for (i=1; i<= mbs; i++) { 2502 a->i[i] = a->i[i-1] + browlengths[i-1]; 2503 a->ilen[i-1] = browlengths[i-1]; 2504 } 2505 a->nz = 0; 2506 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2507 2508 /* read in nonzero values */ 2509 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2510 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2511 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2512 2513 /* set "a" and "j" values into matrix */ 2514 nzcount = 0; jcount = 0; 2515 for (i=0; i<mbs; i++) { 2516 nzcountb = nzcount; 2517 nmask = 0; 2518 for (j=0; j<bs; j++) { 2519 kmax = rowlengths[i*bs+j]; 2520 for (k=0; k<kmax; k++) { 2521 tmp = jj[nzcount++]/bs; 2522 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2523 } 2524 } 2525 /* sort the masked values */ 2526 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2527 2528 /* set "j" values into matrix */ 2529 maskcount = 1; 2530 for (j=0; j<nmask; j++) { 2531 a->j[jcount++] = masked[j]; 2532 mask[masked[j]] = maskcount++; 2533 } 2534 /* set "a" values into matrix */ 2535 ishift = bs2*a->i[i]; 2536 for (j=0; j<bs; j++) { 2537 kmax = rowlengths[i*bs+j]; 2538 for (k=0; k<kmax; k++) { 2539 tmp = jj[nzcountb]/bs ; 2540 block = mask[tmp] - 1; 2541 point = jj[nzcountb] - bs*tmp; 2542 idx = ishift + bs2*block + j + bs*point; 2543 a->a[idx] = (MatScalar)aa[nzcountb++]; 2544 } 2545 } 2546 /* zero out the mask elements we set */ 2547 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2548 } 2549 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2550 2551 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2552 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2553 ierr = PetscFree(aa);CHKERRQ(ierr); 2554 ierr = PetscFree(jj);CHKERRQ(ierr); 2555 ierr = PetscFree(mask);CHKERRQ(ierr); 2556 2557 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2558 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatView_Private(B);CHKERRQ(ierr); 2560 2561 *A = B; 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatCreateSeqBAIJ" 2567 /*@C 2568 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2569 compressed row) format. For good matrix assembly performance the 2570 user should preallocate the matrix storage by setting the parameter nz 2571 (or the array nnz). By setting these parameters accurately, performance 2572 during matrix assembly can be increased by more than a factor of 50. 2573 2574 Collective on MPI_Comm 2575 2576 Input Parameters: 2577 + comm - MPI communicator, set to PETSC_COMM_SELF 2578 . bs - size of block 2579 . m - number of rows 2580 . n - number of columns 2581 . nz - number of nonzero blocks per block row (same for all rows) 2582 - nnz - array containing the number of nonzero blocks in the various block rows 2583 (possibly different for each block row) or PETSC_NULL 2584 2585 Output Parameter: 2586 . A - the matrix 2587 2588 Options Database Keys: 2589 . -mat_no_unroll - uses code that does not unroll the loops in the 2590 block calculations (much slower) 2591 . -mat_block_size - size of the blocks to use 2592 2593 Level: intermediate 2594 2595 Notes: 2596 A nonzero block is any block that as 1 or more nonzeros in it 2597 2598 The block AIJ format is fully compatible with standard Fortran 77 2599 storage. That is, the stored row and column indices can begin at 2600 either one (as in Fortran) or zero. See the users' manual for details. 2601 2602 Specify the preallocated storage with either nz or nnz (not both). 2603 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2604 allocation. For additional details, see the users manual chapter on 2605 matrices. 2606 2607 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2608 @*/ 2609 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2610 { 2611 int ierr; 2612 2613 PetscFunctionBegin; 2614 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2615 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2616 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2622 /*@C 2623 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2624 per row in the matrix. For good matrix assembly performance the 2625 user should preallocate the matrix storage by setting the parameter nz 2626 (or the array nnz). By setting these parameters accurately, performance 2627 during matrix assembly can be increased by more than a factor of 50. 2628 2629 Collective on MPI_Comm 2630 2631 Input Parameters: 2632 + A - the matrix 2633 . bs - size of block 2634 . nz - number of block nonzeros per block row (same for all rows) 2635 - nnz - array containing the number of block nonzeros in the various block rows 2636 (possibly different for each block row) or PETSC_NULL 2637 2638 Options Database Keys: 2639 . -mat_no_unroll - uses code that does not unroll the loops in the 2640 block calculations (much slower) 2641 . -mat_block_size - size of the blocks to use 2642 2643 Level: intermediate 2644 2645 Notes: 2646 The block AIJ format is fully compatible with standard Fortran 77 2647 storage. That is, the stored row and column indices can begin at 2648 either one (as in Fortran) or zero. See the users' manual for details. 2649 2650 Specify the preallocated storage with either nz or nnz (not both). 2651 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2652 allocation. For additional details, see the users manual chapter on 2653 matrices. 2654 2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2656 @*/ 2657 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2658 { 2659 int ierr,(*f)(Mat,int,int,const int[]); 2660 2661 PetscFunctionBegin; 2662 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2663 if (f) { 2664 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2665 } 2666 PetscFunctionReturn(0); 2667 } 2668 2669