1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 15 int MatInvertBlockDiagonal_SeqBAIJ(Mat A) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr; 19 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) PetscFunctionReturn(0); 23 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 24 diag_offset = a->diag; 25 if (!a->idiag) { 26 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 27 } 28 diag = a->idiag; 29 mdiag = a->idiag+bs*bs*mbs; 30 /* factor and invert each block */ 31 switch (a->bs){ 32 case 2: 33 for (i=0; i<mbs; i++) { 34 odiag = v + 4*diag_offset[i]; 35 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 36 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 37 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 38 diag += 4; 39 mdiag += 4; 40 } 41 break; 42 case 3: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 9*diag_offset[i]; 45 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 46 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 47 diag[8] = odiag[8]; 48 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 49 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 50 mdiag[8] = odiag[8]; 51 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 52 diag += 9; 53 mdiag += 9; 54 } 55 break; 56 case 4: 57 for (i=0; i<mbs; i++) { 58 odiag = v + 16*diag_offset[i]; 59 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 61 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 62 diag += 16; 63 mdiag += 16; 64 } 65 break; 66 case 5: 67 for (i=0; i<mbs; i++) { 68 odiag = v + 25*diag_offset[i]; 69 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 71 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 72 diag += 25; 73 mdiag += 25; 74 } 75 break; 76 default: 77 SETERRQ1(1,"not supported for block size %d",a->bs); 78 } 79 a->idiagvalid = PETSC_TRUE; 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 85 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 88 PetscScalar *x,x1,x2,s1,s2; 89 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 90 int ierr,m = a->mbs,i,i2,nz,idx; 91 const int *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 int ierr,m = a->mbs,i,i2,nz,idx; 192 const int *diag,*ai = a->i,*aj = a->j,*vi; 193 194 PetscFunctionBegin; 195 its = its*lits; 196 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 197 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 198 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 199 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 200 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 201 202 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 203 204 diag = a->diag; 205 idiag = a->idiag; 206 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 207 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 208 209 if (flag & SOR_ZERO_INITIAL_GUESS) { 210 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 211 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 212 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 213 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 214 i2 = 3; 215 idiag += 9; 216 for (i=1; i<m; i++) { 217 v = aa + 9*ai[i]; 218 vi = aj + ai[i]; 219 nz = diag[i] - ai[i]; 220 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 221 while (nz--) { 222 idx = 3*(*vi++); 223 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 224 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 225 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 226 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 227 v += 9; 228 } 229 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 230 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 231 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 232 idiag += 9; 233 i2 += 3; 234 } 235 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 236 PetscLogFlops(9*(a->nz)); 237 } 238 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 239 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 240 i2 = 0; 241 mdiag = a->idiag+9*a->mbs; 242 for (i=0; i<m; i++) { 243 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 244 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 245 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 246 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 247 mdiag += 9; 248 i2 += 3; 249 } 250 PetscLogFlops(15*m); 251 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 252 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 255 idiag = a->idiag+9*a->mbs - 9; 256 i2 = 3*m - 3; 257 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 258 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 259 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 260 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 261 idiag -= 9; 262 i2 -= 3; 263 for (i=m-2; i>=0; i--) { 264 v = aa + 9*(diag[i]+1); 265 vi = aj + diag[i] + 1; 266 nz = ai[i+1] - diag[i] - 1; 267 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 268 while (nz--) { 269 idx = 3*(*vi++); 270 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 271 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 272 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 273 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 274 v += 9; 275 } 276 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 277 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 278 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 279 idiag -= 9; 280 i2 -= 3; 281 } 282 PetscLogFlops(9*(a->nz)); 283 } 284 } else { 285 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 286 } 287 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 288 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 294 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 295 { 296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 297 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 298 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 299 int ierr,m = a->mbs,i,i2,nz,idx; 300 const int *diag,*ai = a->i,*aj = a->j,*vi; 301 302 PetscFunctionBegin; 303 its = its*lits; 304 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 305 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 306 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 307 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 308 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 309 310 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 311 312 diag = a->diag; 313 idiag = a->idiag; 314 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 315 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 316 317 if (flag & SOR_ZERO_INITIAL_GUESS) { 318 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 319 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 320 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 321 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 322 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 323 i2 = 4; 324 idiag += 16; 325 for (i=1; i<m; i++) { 326 v = aa + 16*ai[i]; 327 vi = aj + ai[i]; 328 nz = diag[i] - ai[i]; 329 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 330 while (nz--) { 331 idx = 4*(*vi++); 332 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 333 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 334 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 335 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 336 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 337 v += 16; 338 } 339 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 340 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 341 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 342 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 343 idiag += 16; 344 i2 += 4; 345 } 346 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 347 PetscLogFlops(16*(a->nz)); 348 } 349 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 350 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 351 i2 = 0; 352 mdiag = a->idiag+16*a->mbs; 353 for (i=0; i<m; i++) { 354 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 355 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 356 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 357 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 358 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 359 mdiag += 16; 360 i2 += 4; 361 } 362 PetscLogFlops(28*m); 363 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 364 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 365 } 366 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 367 idiag = a->idiag+16*a->mbs - 16; 368 i2 = 4*m - 4; 369 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 370 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 371 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 372 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 373 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 374 idiag -= 16; 375 i2 -= 4; 376 for (i=m-2; i>=0; i--) { 377 v = aa + 16*(diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 381 while (nz--) { 382 idx = 4*(*vi++); 383 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 384 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 385 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 386 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 387 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 388 v += 16; 389 } 390 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 391 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 392 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 393 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 394 idiag -= 16; 395 i2 -= 4; 396 } 397 PetscLogFlops(16*(a->nz)); 398 } 399 } else { 400 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 401 } 402 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 403 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 409 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 410 { 411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 412 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 413 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 414 int ierr,m = a->mbs,i,i2,nz,idx; 415 const int *diag,*ai = a->i,*aj = a->j,*vi; 416 417 PetscFunctionBegin; 418 its = its*lits; 419 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 420 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 421 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 422 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 423 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 424 425 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 426 427 diag = a->diag; 428 idiag = a->idiag; 429 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 431 432 if (flag & SOR_ZERO_INITIAL_GUESS) { 433 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 434 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 435 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 436 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 437 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 438 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 439 i2 = 5; 440 idiag += 25; 441 for (i=1; i<m; i++) { 442 v = aa + 25*ai[i]; 443 vi = aj + ai[i]; 444 nz = diag[i] - ai[i]; 445 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 446 while (nz--) { 447 idx = 5*(*vi++); 448 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 449 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 450 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 451 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 452 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 453 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 454 v += 25; 455 } 456 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 457 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 458 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 459 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 460 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 461 idiag += 25; 462 i2 += 5; 463 } 464 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 465 PetscLogFlops(25*(a->nz)); 466 } 467 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 468 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 469 i2 = 0; 470 mdiag = a->idiag+25*a->mbs; 471 for (i=0; i<m; i++) { 472 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 473 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 474 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 475 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 476 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 477 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 478 mdiag += 25; 479 i2 += 5; 480 } 481 PetscLogFlops(45*m); 482 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 483 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 484 } 485 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 486 idiag = a->idiag+25*a->mbs - 25; 487 i2 = 5*m - 5; 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 489 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 490 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 491 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 492 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 493 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 494 idiag -= 25; 495 i2 -= 5; 496 for (i=m-2; i>=0; i--) { 497 v = aa + 25*(diag[i]+1); 498 vi = aj + diag[i] + 1; 499 nz = ai[i+1] - diag[i] - 1; 500 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 501 while (nz--) { 502 idx = 5*(*vi++); 503 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 504 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 505 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 506 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 507 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 508 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 509 v += 25; 510 } 511 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 512 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 513 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 514 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 515 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 516 idiag -= 25; 517 i2 -= 5; 518 } 519 PetscLogFlops(25*(a->nz)); 520 } 521 } else { 522 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 523 } 524 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 525 ierr = VecRestoreArrayFast(bb,(PetscScalar**)&b);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 /* 530 Special version for Fun3d sequential benchmark 531 */ 532 #if defined(PETSC_HAVE_FORTRAN_CAPS) 533 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 534 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 535 #define matsetvaluesblocked4_ matsetvaluesblocked4 536 #endif 537 538 EXTERN_C_BEGIN 539 #undef __FUNCT__ 540 #define __FUNCT__ "matsetvaluesblocked4_" 541 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 542 { 543 Mat A = *AA; 544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 545 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 546 int *ai=a->i,*ailen=a->ilen; 547 int *aj=a->j,stepval; 548 const PetscScalar *value = v; 549 MatScalar *ap,*aa = a->a,*bap; 550 551 PetscFunctionBegin; 552 stepval = (n-1)*4; 553 for (k=0; k<m; k++) { /* loop over added rows */ 554 row = im[k]; 555 rp = aj + ai[row]; 556 ap = aa + 16*ai[row]; 557 nrow = ailen[row]; 558 low = 0; 559 for (l=0; l<n; l++) { /* loop over added columns */ 560 col = in[l]; 561 value = v + k*(stepval+4)*4 + l*4; 562 low = 0; high = nrow; 563 while (high-low > 7) { 564 t = (low+high)/2; 565 if (rp[t] > col) high = t; 566 else low = t; 567 } 568 for (i=low; i<high; i++) { 569 if (rp[i] > col) break; 570 if (rp[i] == col) { 571 bap = ap + 16*i; 572 for (ii=0; ii<4; ii++,value+=stepval) { 573 for (jj=ii; jj<16; jj+=4) { 574 bap[jj] += *value++; 575 } 576 } 577 goto noinsert2; 578 } 579 } 580 N = nrow++ - 1; 581 /* shift up all the later entries in this row */ 582 for (ii=N; ii>=i; ii--) { 583 rp[ii+1] = rp[ii]; 584 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 585 } 586 if (N >= i) { 587 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 588 } 589 rp[i] = col; 590 bap = ap + 16*i; 591 for (ii=0; ii<4; ii++,value+=stepval) { 592 for (jj=ii; jj<16; jj+=4) { 593 bap[jj] = *value++; 594 } 595 } 596 noinsert2:; 597 low = i; 598 } 599 ailen[row] = nrow; 600 } 601 } 602 EXTERN_C_END 603 604 #if defined(PETSC_HAVE_FORTRAN_CAPS) 605 #define matsetvalues4_ MATSETVALUES4 606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 607 #define matsetvalues4_ matsetvalues4 608 #endif 609 610 EXTERN_C_BEGIN 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatSetValues4_" 613 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 614 { 615 Mat A = *AA; 616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 617 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 618 int *ai=a->i,*ailen=a->ilen; 619 int *aj=a->j,brow,bcol; 620 int ridx,cidx; 621 MatScalar *ap,value,*aa=a->a,*bap; 622 623 PetscFunctionBegin; 624 for (k=0; k<m; k++) { /* loop over added rows */ 625 row = im[k]; brow = row/4; 626 rp = aj + ai[brow]; 627 ap = aa + 16*ai[brow]; 628 nrow = ailen[brow]; 629 low = 0; 630 for (l=0; l<n; l++) { /* loop over added columns */ 631 col = in[l]; bcol = col/4; 632 ridx = row % 4; cidx = col % 4; 633 value = v[l + k*n]; 634 low = 0; high = nrow; 635 while (high-low > 7) { 636 t = (low+high)/2; 637 if (rp[t] > bcol) high = t; 638 else low = t; 639 } 640 for (i=low; i<high; i++) { 641 if (rp[i] > bcol) break; 642 if (rp[i] == bcol) { 643 bap = ap + 16*i + 4*cidx + ridx; 644 *bap += value; 645 goto noinsert1; 646 } 647 } 648 N = nrow++ - 1; 649 /* shift up all the later entries in this row */ 650 for (ii=N; ii>=i; ii--) { 651 rp[ii+1] = rp[ii]; 652 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 653 } 654 if (N>=i) { 655 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 656 } 657 rp[i] = bcol; 658 ap[16*i + 4*cidx + ridx] = value; 659 noinsert1:; 660 low = i; 661 } 662 ailen[brow] = nrow; 663 } 664 } 665 EXTERN_C_END 666 667 /* UGLY, ugly, ugly 668 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 669 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 670 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 671 converts the entries into single precision and then calls ..._MatScalar() to put them 672 into the single precision data structures. 673 */ 674 #if defined(PETSC_USE_MAT_SINGLE) 675 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 676 #else 677 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 678 #endif 679 680 #define CHUNKSIZE 10 681 682 /* 683 Checks for missing diagonals 684 */ 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 687 int MatMissingDiagonal_SeqBAIJ(Mat A) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 int *diag,*jj = a->j,i,ierr; 691 692 PetscFunctionBegin; 693 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 694 diag = a->diag; 695 for (i=0; i<a->mbs; i++) { 696 if (jj[diag[i]] != i) { 697 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 698 } 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 705 int MatMarkDiagonal_SeqBAIJ(Mat A) 706 { 707 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 708 int i,j,*diag,m = a->mbs,ierr; 709 710 PetscFunctionBegin; 711 if (a->diag) PetscFunctionReturn(0); 712 713 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 714 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 715 for (i=0; i<m; i++) { 716 diag[i] = a->i[i+1]; 717 for (j=a->i[i]; j<a->i[i+1]; j++) { 718 if (a->j[j] == i) { 719 diag[i] = j; 720 break; 721 } 722 } 723 } 724 a->diag = diag; 725 PetscFunctionReturn(0); 726 } 727 728 729 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 733 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 int ierr,n = a->mbs,i; 737 738 PetscFunctionBegin; 739 *nn = n; 740 if (!ia) PetscFunctionReturn(0); 741 if (symmetric) { 742 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 743 } else if (oshift == 1) { 744 /* temporarily add 1 to i and j indices */ 745 int nz = a->i[n]; 746 for (i=0; i<nz; i++) a->j[i]++; 747 for (i=0; i<n+1; i++) a->i[i]++; 748 *ia = a->i; *ja = a->j; 749 } else { 750 *ia = a->i; *ja = a->j; 751 } 752 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 758 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 int i,n = a->mbs,ierr; 762 763 PetscFunctionBegin; 764 if (!ia) PetscFunctionReturn(0); 765 if (symmetric) { 766 ierr = PetscFree(*ia);CHKERRQ(ierr); 767 ierr = PetscFree(*ja);CHKERRQ(ierr); 768 } else if (oshift == 1) { 769 int nz = a->i[n]-1; 770 for (i=0; i<nz; i++) a->j[i]--; 771 for (i=0; i<n+1; i++) a->i[i]--; 772 } 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 778 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 779 { 780 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 781 782 PetscFunctionBegin; 783 *bs = baij->bs; 784 PetscFunctionReturn(0); 785 } 786 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatDestroy_SeqBAIJ" 790 int MatDestroy_SeqBAIJ(Mat A) 791 { 792 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 793 int ierr; 794 795 PetscFunctionBegin; 796 #if defined(PETSC_USE_LOG) 797 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 798 #endif 799 ierr = PetscFree(a->a);CHKERRQ(ierr); 800 if (!a->singlemalloc) { 801 ierr = PetscFree(a->i);CHKERRQ(ierr); 802 ierr = PetscFree(a->j);CHKERRQ(ierr); 803 } 804 if (a->row) { 805 ierr = ISDestroy(a->row);CHKERRQ(ierr); 806 } 807 if (a->col) { 808 ierr = ISDestroy(a->col);CHKERRQ(ierr); 809 } 810 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 811 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 812 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 813 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 814 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 815 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 816 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 817 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 818 #if defined(PETSC_USE_MAT_SINGLE) 819 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 820 #endif 821 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 822 823 ierr = PetscFree(a);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSetOption_SeqBAIJ" 829 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 832 833 PetscFunctionBegin; 834 switch (op) { 835 case MAT_ROW_ORIENTED: 836 a->roworiented = PETSC_TRUE; 837 break; 838 case MAT_COLUMN_ORIENTED: 839 a->roworiented = PETSC_FALSE; 840 break; 841 case MAT_COLUMNS_SORTED: 842 a->sorted = PETSC_TRUE; 843 break; 844 case MAT_COLUMNS_UNSORTED: 845 a->sorted = PETSC_FALSE; 846 break; 847 case MAT_KEEP_ZEROED_ROWS: 848 a->keepzeroedrows = PETSC_TRUE; 849 break; 850 case MAT_NO_NEW_NONZERO_LOCATIONS: 851 a->nonew = 1; 852 break; 853 case MAT_NEW_NONZERO_LOCATION_ERR: 854 a->nonew = -1; 855 break; 856 case MAT_NEW_NONZERO_ALLOCATION_ERR: 857 a->nonew = -2; 858 break; 859 case MAT_YES_NEW_NONZERO_LOCATIONS: 860 a->nonew = 0; 861 break; 862 case MAT_ROWS_SORTED: 863 case MAT_ROWS_UNSORTED: 864 case MAT_YES_NEW_DIAGONALS: 865 case MAT_IGNORE_OFF_PROC_ENTRIES: 866 case MAT_USE_HASH_TABLE: 867 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 868 break; 869 case MAT_NO_NEW_DIAGONALS: 870 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 871 case MAT_SYMMETRIC: 872 case MAT_STRUCTURALLY_SYMMETRIC: 873 case MAT_NOT_SYMMETRIC: 874 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 875 case MAT_HERMITIAN: 876 case MAT_NOT_HERMITIAN: 877 case MAT_SYMMETRY_ETERNAL: 878 case MAT_NOT_SYMMETRY_ETERNAL: 879 break; 880 default: 881 SETERRQ(PETSC_ERR_SUP,"unknown option"); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatGetRow_SeqBAIJ" 888 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 891 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 892 MatScalar *aa,*aa_i; 893 PetscScalar *v_i; 894 895 PetscFunctionBegin; 896 bs = a->bs; 897 ai = a->i; 898 aj = a->j; 899 aa = a->a; 900 bs2 = a->bs2; 901 902 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 903 904 bn = row/bs; /* Block number */ 905 bp = row % bs; /* Block Position */ 906 M = ai[bn+1] - ai[bn]; 907 *nz = bs*M; 908 909 if (v) { 910 *v = 0; 911 if (*nz) { 912 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 913 for (i=0; i<M; i++) { /* for each block in the block row */ 914 v_i = *v + i*bs; 915 aa_i = aa + bs2*(ai[bn] + i); 916 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 917 } 918 } 919 } 920 921 if (idx) { 922 *idx = 0; 923 if (*nz) { 924 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 925 for (i=0; i<M; i++) { /* for each block in the block row */ 926 idx_i = *idx + i*bs; 927 itmp = bs*aj[ai[bn] + i]; 928 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 929 } 930 } 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 937 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 938 { 939 int ierr; 940 941 PetscFunctionBegin; 942 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 943 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatTranspose_SeqBAIJ" 949 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 950 { 951 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 952 Mat C; 953 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 954 int *rows,*cols,bs2=a->bs2; 955 PetscScalar *array; 956 957 PetscFunctionBegin; 958 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 959 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 960 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 961 962 #if defined(PETSC_USE_MAT_SINGLE) 963 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 964 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 965 #else 966 array = a->a; 967 #endif 968 969 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 970 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 971 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 972 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 973 ierr = PetscFree(col);CHKERRQ(ierr); 974 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 975 cols = rows + bs; 976 for (i=0; i<mbs; i++) { 977 cols[0] = i*bs; 978 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 979 len = ai[i+1] - ai[i]; 980 for (j=0; j<len; j++) { 981 rows[0] = (*aj++)*bs; 982 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 983 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 984 array += bs2; 985 } 986 } 987 ierr = PetscFree(rows);CHKERRQ(ierr); 988 #if defined(PETSC_USE_MAT_SINGLE) 989 ierr = PetscFree(array); 990 #endif 991 992 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 995 if (B) { 996 *B = C; 997 } else { 998 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1005 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1006 { 1007 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1008 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1009 PetscScalar *aa; 1010 FILE *file; 1011 1012 PetscFunctionBegin; 1013 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1014 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1015 col_lens[0] = MAT_FILE_COOKIE; 1016 1017 col_lens[1] = A->m; 1018 col_lens[2] = A->n; 1019 col_lens[3] = a->nz*bs2; 1020 1021 /* store lengths of each row and write (including header) to file */ 1022 count = 0; 1023 for (i=0; i<a->mbs; i++) { 1024 for (j=0; j<bs; j++) { 1025 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1026 } 1027 } 1028 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1029 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1030 1031 /* store column indices (zero start index) */ 1032 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1033 count = 0; 1034 for (i=0; i<a->mbs; i++) { 1035 for (j=0; j<bs; j++) { 1036 for (k=a->i[i]; k<a->i[i+1]; k++) { 1037 for (l=0; l<bs; l++) { 1038 jj[count++] = bs*a->j[k] + l; 1039 } 1040 } 1041 } 1042 } 1043 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1044 ierr = PetscFree(jj);CHKERRQ(ierr); 1045 1046 /* store nonzero values */ 1047 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1048 count = 0; 1049 for (i=0; i<a->mbs; i++) { 1050 for (j=0; j<bs; j++) { 1051 for (k=a->i[i]; k<a->i[i+1]; k++) { 1052 for (l=0; l<bs; l++) { 1053 aa[count++] = a->a[bs2*k + l*bs + j]; 1054 } 1055 } 1056 } 1057 } 1058 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1059 ierr = PetscFree(aa);CHKERRQ(ierr); 1060 1061 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1062 if (file) { 1063 fprintf(file,"-matload_block_size %d\n",a->bs); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1070 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1071 { 1072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1073 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 1074 PetscViewerFormat format; 1075 1076 PetscFunctionBegin; 1077 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1078 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1079 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1080 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1081 Mat aij; 1082 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1083 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1084 ierr = MatDestroy(aij);CHKERRQ(ierr); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1088 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1089 for (i=0; i<a->mbs; i++) { 1090 for (j=0; j<bs; j++) { 1091 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1092 for (k=a->i[i]; k<a->i[i+1]; k++) { 1093 for (l=0; l<bs; l++) { 1094 #if defined(PETSC_USE_COMPLEX) 1095 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1096 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1097 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1098 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1099 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1100 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1101 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1102 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1103 } 1104 #else 1105 if (a->a[bs2*k + l*bs + j] != 0.0) { 1106 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1107 } 1108 #endif 1109 } 1110 } 1111 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1112 } 1113 } 1114 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1115 } else { 1116 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1117 for (i=0; i<a->mbs; i++) { 1118 for (j=0; j<bs; j++) { 1119 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1120 for (k=a->i[i]; k<a->i[i+1]; k++) { 1121 for (l=0; l<bs; l++) { 1122 #if defined(PETSC_USE_COMPLEX) 1123 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1124 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1125 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1126 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1127 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1128 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1129 } else { 1130 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1131 } 1132 #else 1133 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1134 #endif 1135 } 1136 } 1137 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1138 } 1139 } 1140 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1141 } 1142 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1148 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1149 { 1150 Mat A = (Mat) Aa; 1151 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1152 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1153 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1154 MatScalar *aa; 1155 PetscViewer viewer; 1156 1157 PetscFunctionBegin; 1158 1159 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1160 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1161 1162 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1163 1164 /* loop over matrix elements drawing boxes */ 1165 color = PETSC_DRAW_BLUE; 1166 for (i=0,row=0; i<mbs; i++,row+=bs) { 1167 for (j=a->i[i]; j<a->i[i+1]; j++) { 1168 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1169 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1170 aa = a->a + j*bs2; 1171 for (k=0; k<bs; k++) { 1172 for (l=0; l<bs; l++) { 1173 if (PetscRealPart(*aa++) >= 0.) continue; 1174 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1175 } 1176 } 1177 } 1178 } 1179 color = PETSC_DRAW_CYAN; 1180 for (i=0,row=0; i<mbs; i++,row+=bs) { 1181 for (j=a->i[i]; j<a->i[i+1]; j++) { 1182 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1183 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1184 aa = a->a + j*bs2; 1185 for (k=0; k<bs; k++) { 1186 for (l=0; l<bs; l++) { 1187 if (PetscRealPart(*aa++) != 0.) continue; 1188 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1189 } 1190 } 1191 } 1192 } 1193 1194 color = PETSC_DRAW_RED; 1195 for (i=0,row=0; i<mbs; i++,row+=bs) { 1196 for (j=a->i[i]; j<a->i[i+1]; j++) { 1197 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1198 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1199 aa = a->a + j*bs2; 1200 for (k=0; k<bs; k++) { 1201 for (l=0; l<bs; l++) { 1202 if (PetscRealPart(*aa++) <= 0.) continue; 1203 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1204 } 1205 } 1206 } 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1213 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1214 { 1215 int ierr; 1216 PetscReal xl,yl,xr,yr,w,h; 1217 PetscDraw draw; 1218 PetscTruth isnull; 1219 1220 PetscFunctionBegin; 1221 1222 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1223 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1224 1225 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1226 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1227 xr += w; yr += h; xl = -w; yl = -h; 1228 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1229 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1230 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatView_SeqBAIJ" 1236 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1237 { 1238 int ierr; 1239 PetscTruth isascii,isbinary,isdraw; 1240 1241 PetscFunctionBegin; 1242 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1243 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1244 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1245 if (isascii){ 1246 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1247 } else if (isbinary) { 1248 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1249 } else if (isdraw) { 1250 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1251 } else { 1252 Mat B; 1253 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1254 ierr = MatView(B,viewer);CHKERRQ(ierr); 1255 ierr = MatDestroy(B);CHKERRQ(ierr); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1263 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1264 { 1265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1266 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1267 int *ai = a->i,*ailen = a->ilen; 1268 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1269 MatScalar *ap,*aa = a->a,zero = 0.0; 1270 1271 PetscFunctionBegin; 1272 for (k=0; k<m; k++) { /* loop over rows */ 1273 row = im[k]; brow = row/bs; 1274 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1275 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1276 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1277 nrow = ailen[brow]; 1278 for (l=0; l<n; l++) { /* loop over columns */ 1279 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1280 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1281 col = in[l] ; 1282 bcol = col/bs; 1283 cidx = col%bs; 1284 ridx = row%bs; 1285 high = nrow; 1286 low = 0; /* assume unsorted */ 1287 while (high-low > 5) { 1288 t = (low+high)/2; 1289 if (rp[t] > bcol) high = t; 1290 else low = t; 1291 } 1292 for (i=low; i<high; i++) { 1293 if (rp[i] > bcol) break; 1294 if (rp[i] == bcol) { 1295 *v++ = ap[bs2*i+bs*cidx+ridx]; 1296 goto finished; 1297 } 1298 } 1299 *v++ = zero; 1300 finished:; 1301 } 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #if defined(PETSC_USE_MAT_SINGLE) 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1309 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1310 { 1311 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1312 int ierr,i,N = m*n*b->bs2; 1313 MatScalar *vsingle; 1314 1315 PetscFunctionBegin; 1316 if (N > b->setvalueslen) { 1317 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1318 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1319 b->setvalueslen = N; 1320 } 1321 vsingle = b->setvaluescopy; 1322 for (i=0; i<N; i++) { 1323 vsingle[i] = v[i]; 1324 } 1325 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 #endif 1329 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1333 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1334 { 1335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1336 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1337 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1338 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 1339 PetscTruth roworiented=a->roworiented; 1340 const MatScalar *value = v; 1341 MatScalar *ap,*aa = a->a,*bap; 1342 1343 PetscFunctionBegin; 1344 if (roworiented) { 1345 stepval = (n-1)*bs; 1346 } else { 1347 stepval = (m-1)*bs; 1348 } 1349 for (k=0; k<m; k++) { /* loop over added rows */ 1350 row = im[k]; 1351 if (row < 0) continue; 1352 #if defined(PETSC_USE_BOPT_g) 1353 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1354 #endif 1355 rp = aj + ai[row]; 1356 ap = aa + bs2*ai[row]; 1357 rmax = imax[row]; 1358 nrow = ailen[row]; 1359 low = 0; 1360 for (l=0; l<n; l++) { /* loop over added columns */ 1361 if (in[l] < 0) continue; 1362 #if defined(PETSC_USE_BOPT_g) 1363 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1364 #endif 1365 col = in[l]; 1366 if (roworiented) { 1367 value = v + k*(stepval+bs)*bs + l*bs; 1368 } else { 1369 value = v + l*(stepval+bs)*bs + k*bs; 1370 } 1371 if (!sorted) low = 0; high = nrow; 1372 while (high-low > 7) { 1373 t = (low+high)/2; 1374 if (rp[t] > col) high = t; 1375 else low = t; 1376 } 1377 for (i=low; i<high; i++) { 1378 if (rp[i] > col) break; 1379 if (rp[i] == col) { 1380 bap = ap + bs2*i; 1381 if (roworiented) { 1382 if (is == ADD_VALUES) { 1383 for (ii=0; ii<bs; ii++,value+=stepval) { 1384 for (jj=ii; jj<bs2; jj+=bs) { 1385 bap[jj] += *value++; 1386 } 1387 } 1388 } else { 1389 for (ii=0; ii<bs; ii++,value+=stepval) { 1390 for (jj=ii; jj<bs2; jj+=bs) { 1391 bap[jj] = *value++; 1392 } 1393 } 1394 } 1395 } else { 1396 if (is == ADD_VALUES) { 1397 for (ii=0; ii<bs; ii++,value+=stepval) { 1398 for (jj=0; jj<bs; jj++) { 1399 *bap++ += *value++; 1400 } 1401 } 1402 } else { 1403 for (ii=0; ii<bs; ii++,value+=stepval) { 1404 for (jj=0; jj<bs; jj++) { 1405 *bap++ = *value++; 1406 } 1407 } 1408 } 1409 } 1410 goto noinsert2; 1411 } 1412 } 1413 if (nonew == 1) goto noinsert2; 1414 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1415 if (nrow >= rmax) { 1416 /* there is no extra room in row, therefore enlarge */ 1417 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1418 MatScalar *new_a; 1419 1420 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1421 1422 /* malloc new storage space */ 1423 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1424 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1425 new_j = (int*)(new_a + bs2*new_nz); 1426 new_i = new_j + new_nz; 1427 1428 /* copy over old data into new slots */ 1429 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1430 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1431 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1432 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1433 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1434 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1435 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1436 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1437 /* free up old matrix storage */ 1438 ierr = PetscFree(a->a);CHKERRQ(ierr); 1439 if (!a->singlemalloc) { 1440 ierr = PetscFree(a->i);CHKERRQ(ierr); 1441 ierr = PetscFree(a->j);CHKERRQ(ierr); 1442 } 1443 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1444 a->singlemalloc = PETSC_TRUE; 1445 1446 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1447 rmax = imax[row] = imax[row] + CHUNKSIZE; 1448 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1449 a->maxnz += bs2*CHUNKSIZE; 1450 a->reallocs++; 1451 a->nz++; 1452 } 1453 N = nrow++ - 1; 1454 /* shift up all the later entries in this row */ 1455 for (ii=N; ii>=i; ii--) { 1456 rp[ii+1] = rp[ii]; 1457 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1458 } 1459 if (N >= i) { 1460 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1461 } 1462 rp[i] = col; 1463 bap = ap + bs2*i; 1464 if (roworiented) { 1465 for (ii=0; ii<bs; ii++,value+=stepval) { 1466 for (jj=ii; jj<bs2; jj+=bs) { 1467 bap[jj] = *value++; 1468 } 1469 } 1470 } else { 1471 for (ii=0; ii<bs; ii++,value+=stepval) { 1472 for (jj=0; jj<bs; jj++) { 1473 *bap++ = *value++; 1474 } 1475 } 1476 } 1477 noinsert2:; 1478 low = i; 1479 } 1480 ailen[row] = nrow; 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1487 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1488 { 1489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1490 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1491 int m = A->m,*ip,N,*ailen = a->ilen; 1492 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 1493 MatScalar *aa = a->a,*ap; 1494 1495 PetscFunctionBegin; 1496 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1497 1498 if (m) rmax = ailen[0]; 1499 for (i=1; i<mbs; i++) { 1500 /* move each row back by the amount of empty slots (fshift) before it*/ 1501 fshift += imax[i-1] - ailen[i-1]; 1502 rmax = PetscMax(rmax,ailen[i]); 1503 if (fshift) { 1504 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1505 N = ailen[i]; 1506 for (j=0; j<N; j++) { 1507 ip[j-fshift] = ip[j]; 1508 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1509 } 1510 } 1511 ai[i] = ai[i-1] + ailen[i-1]; 1512 } 1513 if (mbs) { 1514 fshift += imax[mbs-1] - ailen[mbs-1]; 1515 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1516 } 1517 /* reset ilen and imax for each row */ 1518 for (i=0; i<mbs; i++) { 1519 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1520 } 1521 a->nz = ai[mbs]; 1522 1523 /* diagonals may have moved, so kill the diagonal pointers */ 1524 a->idiagvalid = PETSC_FALSE; 1525 if (fshift && a->diag) { 1526 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1527 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1528 a->diag = 0; 1529 } 1530 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1531 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1532 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1533 a->reallocs = 0; 1534 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1535 1536 PetscFunctionReturn(0); 1537 } 1538 1539 1540 1541 /* 1542 This function returns an array of flags which indicate the locations of contiguous 1543 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1544 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1545 Assume: sizes should be long enough to hold all the values. 1546 */ 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1549 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1550 { 1551 int i,j,k,row; 1552 PetscTruth flg; 1553 1554 PetscFunctionBegin; 1555 for (i=0,j=0; i<n; j++) { 1556 row = idx[i]; 1557 if (row%bs!=0) { /* Not the begining of a block */ 1558 sizes[j] = 1; 1559 i++; 1560 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1561 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1562 i++; 1563 } else { /* Begining of the block, so check if the complete block exists */ 1564 flg = PETSC_TRUE; 1565 for (k=1; k<bs; k++) { 1566 if (row+k != idx[i+k]) { /* break in the block */ 1567 flg = PETSC_FALSE; 1568 break; 1569 } 1570 } 1571 if (flg == PETSC_TRUE) { /* No break in the bs */ 1572 sizes[j] = bs; 1573 i+= bs; 1574 } else { 1575 sizes[j] = 1; 1576 i++; 1577 } 1578 } 1579 } 1580 *bs_max = j; 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1586 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1587 { 1588 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1589 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1590 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1591 PetscScalar zero = 0.0; 1592 MatScalar *aa; 1593 1594 PetscFunctionBegin; 1595 /* Make a copy of the IS and sort it */ 1596 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1597 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1598 1599 /* allocate memory for rows,sizes */ 1600 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1601 sizes = rows + is_n; 1602 1603 /* copy IS values to rows, and sort them */ 1604 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1605 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1606 if (baij->keepzeroedrows) { 1607 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1608 bs_max = is_n; 1609 } else { 1610 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1611 } 1612 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1613 1614 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1615 row = rows[j]; 1616 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1617 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1618 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1619 if (sizes[i] == bs && !baij->keepzeroedrows) { 1620 if (diag) { 1621 if (baij->ilen[row/bs] > 0) { 1622 baij->ilen[row/bs] = 1; 1623 baij->j[baij->i[row/bs]] = row/bs; 1624 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1625 } 1626 /* Now insert all the diagonal values for this bs */ 1627 for (k=0; k<bs; k++) { 1628 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 } else { /* (!diag) */ 1631 baij->ilen[row/bs] = 0; 1632 } /* end (!diag) */ 1633 } else { /* (sizes[i] != bs) */ 1634 #if defined (PETSC_USE_DEBUG) 1635 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1636 #endif 1637 for (k=0; k<count; k++) { 1638 aa[0] = zero; 1639 aa += bs; 1640 } 1641 if (diag) { 1642 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1643 } 1644 } 1645 } 1646 1647 ierr = PetscFree(rows);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1654 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1655 { 1656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1657 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1658 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1659 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1660 int ridx,cidx,bs2=a->bs2,ierr; 1661 PetscTruth roworiented=a->roworiented; 1662 MatScalar *ap,value,*aa=a->a,*bap; 1663 1664 PetscFunctionBegin; 1665 for (k=0; k<m; k++) { /* loop over added rows */ 1666 row = im[k]; brow = row/bs; 1667 if (row < 0) continue; 1668 #if defined(PETSC_USE_BOPT_g) 1669 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1670 #endif 1671 rp = aj + ai[brow]; 1672 ap = aa + bs2*ai[brow]; 1673 rmax = imax[brow]; 1674 nrow = ailen[brow]; 1675 low = 0; 1676 for (l=0; l<n; l++) { /* loop over added columns */ 1677 if (in[l] < 0) continue; 1678 #if defined(PETSC_USE_BOPT_g) 1679 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1680 #endif 1681 col = in[l]; bcol = col/bs; 1682 ridx = row % bs; cidx = col % bs; 1683 if (roworiented) { 1684 value = v[l + k*n]; 1685 } else { 1686 value = v[k + l*m]; 1687 } 1688 if (!sorted) low = 0; high = nrow; 1689 while (high-low > 7) { 1690 t = (low+high)/2; 1691 if (rp[t] > bcol) high = t; 1692 else low = t; 1693 } 1694 for (i=low; i<high; i++) { 1695 if (rp[i] > bcol) break; 1696 if (rp[i] == bcol) { 1697 bap = ap + bs2*i + bs*cidx + ridx; 1698 if (is == ADD_VALUES) *bap += value; 1699 else *bap = value; 1700 goto noinsert1; 1701 } 1702 } 1703 if (nonew == 1) goto noinsert1; 1704 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1705 if (nrow >= rmax) { 1706 /* there is no extra room in row, therefore enlarge */ 1707 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1708 MatScalar *new_a; 1709 1710 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1711 1712 /* Malloc new storage space */ 1713 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1714 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1715 new_j = (int*)(new_a + bs2*new_nz); 1716 new_i = new_j + new_nz; 1717 1718 /* copy over old data into new slots */ 1719 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1720 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1721 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1722 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1723 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1724 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1725 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1726 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1727 /* free up old matrix storage */ 1728 ierr = PetscFree(a->a);CHKERRQ(ierr); 1729 if (!a->singlemalloc) { 1730 ierr = PetscFree(a->i);CHKERRQ(ierr); 1731 ierr = PetscFree(a->j);CHKERRQ(ierr); 1732 } 1733 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1734 a->singlemalloc = PETSC_TRUE; 1735 1736 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1737 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1738 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1739 a->maxnz += bs2*CHUNKSIZE; 1740 a->reallocs++; 1741 a->nz++; 1742 } 1743 N = nrow++ - 1; 1744 /* shift up all the later entries in this row */ 1745 for (ii=N; ii>=i; ii--) { 1746 rp[ii+1] = rp[ii]; 1747 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1748 } 1749 if (N>=i) { 1750 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1751 } 1752 rp[i] = bcol; 1753 ap[bs2*i + bs*cidx + ridx] = value; 1754 noinsert1:; 1755 low = i; 1756 } 1757 ailen[brow] = nrow; 1758 } 1759 PetscFunctionReturn(0); 1760 } 1761 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1765 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1766 { 1767 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1768 Mat outA; 1769 int ierr; 1770 PetscTruth row_identity,col_identity; 1771 1772 PetscFunctionBegin; 1773 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1774 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1775 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1776 if (!row_identity || !col_identity) { 1777 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1778 } 1779 1780 outA = inA; 1781 inA->factor = FACTOR_LU; 1782 1783 if (!a->diag) { 1784 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1785 } 1786 1787 a->row = row; 1788 a->col = col; 1789 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1790 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1791 1792 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1793 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1794 PetscLogObjectParent(inA,a->icol); 1795 1796 /* 1797 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1798 for ILU(0) factorization with natural ordering 1799 */ 1800 if (a->bs < 8) { 1801 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1802 } else { 1803 if (!a->solve_work) { 1804 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1805 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1806 } 1807 } 1808 1809 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1810 1811 PetscFunctionReturn(0); 1812 } 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1815 int MatPrintHelp_SeqBAIJ(Mat A) 1816 { 1817 static PetscTruth called = PETSC_FALSE; 1818 MPI_Comm comm = A->comm; 1819 int ierr; 1820 1821 PetscFunctionBegin; 1822 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1823 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1824 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 EXTERN_C_BEGIN 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1831 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1832 { 1833 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1834 int i,nz,nbs; 1835 1836 PetscFunctionBegin; 1837 nz = baij->maxnz/baij->bs2; 1838 nbs = baij->nbs; 1839 for (i=0; i<nz; i++) { 1840 baij->j[i] = indices[i]; 1841 } 1842 baij->nz = nz; 1843 for (i=0; i<nbs; i++) { 1844 baij->ilen[i] = baij->imax[i]; 1845 } 1846 1847 PetscFunctionReturn(0); 1848 } 1849 EXTERN_C_END 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1853 /*@ 1854 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1855 in the matrix. 1856 1857 Input Parameters: 1858 + mat - the SeqBAIJ matrix 1859 - indices - the column indices 1860 1861 Level: advanced 1862 1863 Notes: 1864 This can be called if you have precomputed the nonzero structure of the 1865 matrix and want to provide it to the matrix object to improve the performance 1866 of the MatSetValues() operation. 1867 1868 You MUST have set the correct numbers of nonzeros per row in the call to 1869 MatCreateSeqBAIJ(). 1870 1871 MUST be called before any calls to MatSetValues(); 1872 1873 @*/ 1874 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1875 { 1876 int ierr,(*f)(Mat,int *); 1877 1878 PetscFunctionBegin; 1879 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1880 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1881 if (f) { 1882 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1883 } else { 1884 SETERRQ(1,"Wrong type of matrix to set column indices"); 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1891 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1892 { 1893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1894 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1895 PetscReal atmp; 1896 PetscScalar *x,zero = 0.0; 1897 MatScalar *aa; 1898 int ncols,brow,krow,kcol; 1899 1900 PetscFunctionBegin; 1901 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1902 bs = a->bs; 1903 aa = a->a; 1904 ai = a->i; 1905 aj = a->j; 1906 mbs = a->mbs; 1907 1908 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1909 ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 1910 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1911 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1912 for (i=0; i<mbs; i++) { 1913 ncols = ai[1] - ai[0]; ai++; 1914 brow = bs*i; 1915 for (j=0; j<ncols; j++){ 1916 /* bcol = bs*(*aj); */ 1917 for (kcol=0; kcol<bs; kcol++){ 1918 for (krow=0; krow<bs; krow++){ 1919 atmp = PetscAbsScalar(*aa); aa++; 1920 row = brow + krow; /* row index */ 1921 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1922 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1923 } 1924 } 1925 aj++; 1926 } 1927 } 1928 ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1934 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1935 { 1936 int ierr; 1937 1938 PetscFunctionBegin; 1939 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1945 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1946 { 1947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1948 PetscFunctionBegin; 1949 *array = a->a; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1955 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1956 { 1957 PetscFunctionBegin; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #include "petscblaslapack.h" 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1964 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1965 { 1966 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1967 int ierr,one=1,i,bs=y->bs,j,bs2; 1968 1969 PetscFunctionBegin; 1970 if (str == SAME_NONZERO_PATTERN) { 1971 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1972 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1973 if (y->xtoy && y->XtoY != X) { 1974 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1975 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1976 } 1977 if (!y->xtoy) { /* get xtoy */ 1978 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1979 y->XtoY = X; 1980 } 1981 bs2 = bs*bs; 1982 for (i=0; i<x->nz; i++) { 1983 j = 0; 1984 while (j < bs2){ 1985 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1986 j++; 1987 } 1988 } 1989 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1990 } else { 1991 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 /* -------------------------------------------------------------------*/ 1997 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1998 MatGetRow_SeqBAIJ, 1999 MatRestoreRow_SeqBAIJ, 2000 MatMult_SeqBAIJ_N, 2001 /* 4*/ MatMultAdd_SeqBAIJ_N, 2002 MatMultTranspose_SeqBAIJ, 2003 MatMultTransposeAdd_SeqBAIJ, 2004 MatSolve_SeqBAIJ_N, 2005 0, 2006 0, 2007 /*10*/ 0, 2008 MatLUFactor_SeqBAIJ, 2009 0, 2010 0, 2011 MatTranspose_SeqBAIJ, 2012 /*15*/ MatGetInfo_SeqBAIJ, 2013 MatEqual_SeqBAIJ, 2014 MatGetDiagonal_SeqBAIJ, 2015 MatDiagonalScale_SeqBAIJ, 2016 MatNorm_SeqBAIJ, 2017 /*20*/ 0, 2018 MatAssemblyEnd_SeqBAIJ, 2019 0, 2020 MatSetOption_SeqBAIJ, 2021 MatZeroEntries_SeqBAIJ, 2022 /*25*/ MatZeroRows_SeqBAIJ, 2023 MatLUFactorSymbolic_SeqBAIJ, 2024 MatLUFactorNumeric_SeqBAIJ_N, 2025 0, 2026 0, 2027 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2028 MatILUFactorSymbolic_SeqBAIJ, 2029 0, 2030 MatGetArray_SeqBAIJ, 2031 MatRestoreArray_SeqBAIJ, 2032 /*35*/ MatDuplicate_SeqBAIJ, 2033 0, 2034 0, 2035 MatILUFactor_SeqBAIJ, 2036 0, 2037 /*40*/ MatAXPY_SeqBAIJ, 2038 MatGetSubMatrices_SeqBAIJ, 2039 MatIncreaseOverlap_SeqBAIJ, 2040 MatGetValues_SeqBAIJ, 2041 0, 2042 /*45*/ MatPrintHelp_SeqBAIJ, 2043 MatScale_SeqBAIJ, 2044 0, 2045 0, 2046 0, 2047 /*50*/ MatGetBlockSize_SeqBAIJ, 2048 MatGetRowIJ_SeqBAIJ, 2049 MatRestoreRowIJ_SeqBAIJ, 2050 0, 2051 0, 2052 /*55*/ 0, 2053 0, 2054 0, 2055 0, 2056 MatSetValuesBlocked_SeqBAIJ, 2057 /*60*/ MatGetSubMatrix_SeqBAIJ, 2058 MatDestroy_SeqBAIJ, 2059 MatView_SeqBAIJ, 2060 MatGetPetscMaps_Petsc, 2061 0, 2062 /*65*/ 0, 2063 0, 2064 0, 2065 0, 2066 0, 2067 /*70*/ MatGetRowMax_SeqBAIJ, 2068 MatConvert_Basic, 2069 0, 2070 0, 2071 0, 2072 /*75*/ 0, 2073 0, 2074 0, 2075 0, 2076 0, 2077 /*80*/ 0, 2078 0, 2079 0, 2080 0, 2081 /*85*/ MatLoad_SeqBAIJ, 2082 0, 2083 0, 2084 0, 2085 0 2086 }; 2087 2088 EXTERN_C_BEGIN 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2091 int MatStoreValues_SeqBAIJ(Mat mat) 2092 { 2093 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2094 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2095 int ierr; 2096 2097 PetscFunctionBegin; 2098 if (aij->nonew != 1) { 2099 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2100 } 2101 2102 /* allocate space for values if not already there */ 2103 if (!aij->saved_values) { 2104 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2105 } 2106 2107 /* copy values over */ 2108 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2109 PetscFunctionReturn(0); 2110 } 2111 EXTERN_C_END 2112 2113 EXTERN_C_BEGIN 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2116 int MatRetrieveValues_SeqBAIJ(Mat mat) 2117 { 2118 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2119 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 2120 2121 PetscFunctionBegin; 2122 if (aij->nonew != 1) { 2123 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2124 } 2125 if (!aij->saved_values) { 2126 SETERRQ(1,"Must call MatStoreValues(A);first"); 2127 } 2128 2129 /* copy values over */ 2130 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 EXTERN_C_END 2134 2135 EXTERN_C_BEGIN 2136 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2137 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2138 EXTERN_C_END 2139 2140 EXTERN_C_BEGIN 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2143 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2144 { 2145 Mat_SeqBAIJ *b; 2146 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 2147 PetscTruth flg; 2148 2149 PetscFunctionBegin; 2150 2151 B->preallocated = PETSC_TRUE; 2152 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2153 if (nnz && newbs != bs) { 2154 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 2155 } 2156 bs = newbs; 2157 2158 mbs = B->m/bs; 2159 nbs = B->n/bs; 2160 bs2 = bs*bs; 2161 2162 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2163 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2164 } 2165 2166 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2167 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2168 if (nnz) { 2169 for (i=0; i<mbs; i++) { 2170 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2171 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2172 } 2173 } 2174 2175 b = (Mat_SeqBAIJ*)B->data; 2176 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2177 B->ops->solve = MatSolve_SeqBAIJ_Update; 2178 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2179 if (!flg) { 2180 switch (bs) { 2181 case 1: 2182 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2183 B->ops->mult = MatMult_SeqBAIJ_1; 2184 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2185 break; 2186 case 2: 2187 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2188 B->ops->mult = MatMult_SeqBAIJ_2; 2189 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2190 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2191 break; 2192 case 3: 2193 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2194 B->ops->mult = MatMult_SeqBAIJ_3; 2195 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2196 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2197 break; 2198 case 4: 2199 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2200 B->ops->mult = MatMult_SeqBAIJ_4; 2201 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2202 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2203 break; 2204 case 5: 2205 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2206 B->ops->mult = MatMult_SeqBAIJ_5; 2207 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2208 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2209 break; 2210 case 6: 2211 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2212 B->ops->mult = MatMult_SeqBAIJ_6; 2213 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2214 break; 2215 case 7: 2216 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2217 B->ops->mult = MatMult_SeqBAIJ_7; 2218 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2219 break; 2220 default: 2221 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2222 B->ops->mult = MatMult_SeqBAIJ_N; 2223 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2224 break; 2225 } 2226 } 2227 b->bs = bs; 2228 b->mbs = mbs; 2229 b->nbs = nbs; 2230 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2231 if (!nnz) { 2232 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2233 else if (nz <= 0) nz = 1; 2234 for (i=0; i<mbs; i++) b->imax[i] = nz; 2235 nz = nz*mbs; 2236 } else { 2237 nz = 0; 2238 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2239 } 2240 2241 /* allocate the matrix space */ 2242 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2243 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2244 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2245 b->j = (int*)(b->a + nz*bs2); 2246 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2247 b->i = b->j + nz; 2248 b->singlemalloc = PETSC_TRUE; 2249 2250 b->i[0] = 0; 2251 for (i=1; i<mbs+1; i++) { 2252 b->i[i] = b->i[i-1] + b->imax[i-1]; 2253 } 2254 2255 /* b->ilen will count nonzeros in each block row so far. */ 2256 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2257 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2258 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2259 2260 b->bs = bs; 2261 b->bs2 = bs2; 2262 b->mbs = mbs; 2263 b->nz = 0; 2264 b->maxnz = nz*bs2; 2265 B->info.nz_unneeded = (PetscReal)b->maxnz; 2266 PetscFunctionReturn(0); 2267 } 2268 EXTERN_C_END 2269 2270 /*MC 2271 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2272 block sparse compressed row format. 2273 2274 Options Database Keys: 2275 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2276 2277 Level: beginner 2278 2279 .seealso: MatCreateSeqBAIJ 2280 M*/ 2281 2282 EXTERN_C_BEGIN 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatCreate_SeqBAIJ" 2285 int MatCreate_SeqBAIJ(Mat B) 2286 { 2287 int ierr,size; 2288 Mat_SeqBAIJ *b; 2289 2290 PetscFunctionBegin; 2291 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2292 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2293 2294 B->m = B->M = PetscMax(B->m,B->M); 2295 B->n = B->N = PetscMax(B->n,B->N); 2296 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2297 B->data = (void*)b; 2298 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2299 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2300 B->factor = 0; 2301 B->lupivotthreshold = 1.0; 2302 B->mapping = 0; 2303 b->row = 0; 2304 b->col = 0; 2305 b->icol = 0; 2306 b->reallocs = 0; 2307 b->saved_values = 0; 2308 #if defined(PETSC_USE_MAT_SINGLE) 2309 b->setvalueslen = 0; 2310 b->setvaluescopy = PETSC_NULL; 2311 #endif 2312 2313 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2314 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2315 2316 b->sorted = PETSC_FALSE; 2317 b->roworiented = PETSC_TRUE; 2318 b->nonew = 0; 2319 b->diag = 0; 2320 b->solve_work = 0; 2321 b->mult_work = 0; 2322 B->spptr = 0; 2323 B->info.nz_unneeded = (PetscReal)b->maxnz; 2324 b->keepzeroedrows = PETSC_FALSE; 2325 b->xtoy = 0; 2326 b->XtoY = 0; 2327 2328 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2329 "MatStoreValues_SeqBAIJ", 2330 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2331 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2332 "MatRetrieveValues_SeqBAIJ", 2333 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2334 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2335 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2336 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2337 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2338 "MatConvert_SeqBAIJ_SeqAIJ", 2339 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2340 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2341 "MatConvert_SeqBAIJ_SeqSBAIJ", 2342 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2343 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2344 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2345 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 EXTERN_C_END 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2352 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2353 { 2354 Mat C; 2355 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2356 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 2357 2358 PetscFunctionBegin; 2359 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2360 2361 *B = 0; 2362 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2363 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2364 c = (Mat_SeqBAIJ*)C->data; 2365 2366 C->M = A->M; 2367 C->N = A->N; 2368 c->bs = a->bs; 2369 c->bs2 = a->bs2; 2370 c->mbs = a->mbs; 2371 c->nbs = a->nbs; 2372 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2373 2374 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2375 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2376 for (i=0; i<mbs; i++) { 2377 c->imax[i] = a->imax[i]; 2378 c->ilen[i] = a->ilen[i]; 2379 } 2380 2381 /* allocate the matrix space */ 2382 c->singlemalloc = PETSC_TRUE; 2383 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2384 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2385 c->j = (int*)(c->a + nz*bs2); 2386 c->i = c->j + nz; 2387 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2388 if (mbs > 0) { 2389 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2390 if (cpvalues == MAT_COPY_VALUES) { 2391 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2392 } else { 2393 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2394 } 2395 } 2396 2397 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2398 c->sorted = a->sorted; 2399 c->roworiented = a->roworiented; 2400 c->nonew = a->nonew; 2401 2402 if (a->diag) { 2403 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2404 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2405 for (i=0; i<mbs; i++) { 2406 c->diag[i] = a->diag[i]; 2407 } 2408 } else c->diag = 0; 2409 c->nz = a->nz; 2410 c->maxnz = a->maxnz; 2411 c->solve_work = 0; 2412 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2413 c->mult_work = 0; 2414 C->preallocated = PETSC_TRUE; 2415 C->assembled = PETSC_TRUE; 2416 *B = C; 2417 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatLoad_SeqBAIJ" 2423 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2424 { 2425 Mat_SeqBAIJ *a; 2426 Mat B; 2427 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2428 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2429 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2430 int *masked,nmask,tmp,bs2,ishift; 2431 PetscScalar *aa; 2432 MPI_Comm comm = ((PetscObject)viewer)->comm; 2433 2434 PetscFunctionBegin; 2435 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2436 bs2 = bs*bs; 2437 2438 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2439 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2440 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2441 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2442 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2443 M = header[1]; N = header[2]; nz = header[3]; 2444 2445 if (header[3] < 0) { 2446 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2447 } 2448 2449 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2450 2451 /* 2452 This code adds extra rows to make sure the number of rows is 2453 divisible by the blocksize 2454 */ 2455 mbs = M/bs; 2456 extra_rows = bs - M + bs*(mbs); 2457 if (extra_rows == bs) extra_rows = 0; 2458 else mbs++; 2459 if (extra_rows) { 2460 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2461 } 2462 2463 /* read in row lengths */ 2464 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2465 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2466 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2467 2468 /* read in column indices */ 2469 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2470 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2471 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2472 2473 /* loop over row lengths determining block row lengths */ 2474 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2475 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2476 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2477 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2478 masked = mask + mbs; 2479 rowcount = 0; nzcount = 0; 2480 for (i=0; i<mbs; i++) { 2481 nmask = 0; 2482 for (j=0; j<bs; j++) { 2483 kmax = rowlengths[rowcount]; 2484 for (k=0; k<kmax; k++) { 2485 tmp = jj[nzcount++]/bs; 2486 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2487 } 2488 rowcount++; 2489 } 2490 browlengths[i] += nmask; 2491 /* zero out the mask elements we set */ 2492 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2493 } 2494 2495 /* create our matrix */ 2496 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2497 ierr = MatSetType(B,type);CHKERRQ(ierr); 2498 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2499 a = (Mat_SeqBAIJ*)B->data; 2500 2501 /* set matrix "i" values */ 2502 a->i[0] = 0; 2503 for (i=1; i<= mbs; i++) { 2504 a->i[i] = a->i[i-1] + browlengths[i-1]; 2505 a->ilen[i-1] = browlengths[i-1]; 2506 } 2507 a->nz = 0; 2508 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2509 2510 /* read in nonzero values */ 2511 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2512 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2513 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2514 2515 /* set "a" and "j" values into matrix */ 2516 nzcount = 0; jcount = 0; 2517 for (i=0; i<mbs; i++) { 2518 nzcountb = nzcount; 2519 nmask = 0; 2520 for (j=0; j<bs; j++) { 2521 kmax = rowlengths[i*bs+j]; 2522 for (k=0; k<kmax; k++) { 2523 tmp = jj[nzcount++]/bs; 2524 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2525 } 2526 } 2527 /* sort the masked values */ 2528 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2529 2530 /* set "j" values into matrix */ 2531 maskcount = 1; 2532 for (j=0; j<nmask; j++) { 2533 a->j[jcount++] = masked[j]; 2534 mask[masked[j]] = maskcount++; 2535 } 2536 /* set "a" values into matrix */ 2537 ishift = bs2*a->i[i]; 2538 for (j=0; j<bs; j++) { 2539 kmax = rowlengths[i*bs+j]; 2540 for (k=0; k<kmax; k++) { 2541 tmp = jj[nzcountb]/bs ; 2542 block = mask[tmp] - 1; 2543 point = jj[nzcountb] - bs*tmp; 2544 idx = ishift + bs2*block + j + bs*point; 2545 a->a[idx] = (MatScalar)aa[nzcountb++]; 2546 } 2547 } 2548 /* zero out the mask elements we set */ 2549 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2550 } 2551 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2552 2553 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2554 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2555 ierr = PetscFree(aa);CHKERRQ(ierr); 2556 ierr = PetscFree(jj);CHKERRQ(ierr); 2557 ierr = PetscFree(mask);CHKERRQ(ierr); 2558 2559 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2561 ierr = MatView_Private(B);CHKERRQ(ierr); 2562 2563 *A = B; 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatCreateSeqBAIJ" 2569 /*@C 2570 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2571 compressed row) format. For good matrix assembly performance the 2572 user should preallocate the matrix storage by setting the parameter nz 2573 (or the array nnz). By setting these parameters accurately, performance 2574 during matrix assembly can be increased by more than a factor of 50. 2575 2576 Collective on MPI_Comm 2577 2578 Input Parameters: 2579 + comm - MPI communicator, set to PETSC_COMM_SELF 2580 . bs - size of block 2581 . m - number of rows 2582 . n - number of columns 2583 . nz - number of nonzero blocks per block row (same for all rows) 2584 - nnz - array containing the number of nonzero blocks in the various block rows 2585 (possibly different for each block row) or PETSC_NULL 2586 2587 Output Parameter: 2588 . A - the matrix 2589 2590 Options Database Keys: 2591 . -mat_no_unroll - uses code that does not unroll the loops in the 2592 block calculations (much slower) 2593 . -mat_block_size - size of the blocks to use 2594 2595 Level: intermediate 2596 2597 Notes: 2598 A nonzero block is any block that as 1 or more nonzeros in it 2599 2600 The block AIJ format is fully compatible with standard Fortran 77 2601 storage. That is, the stored row and column indices can begin at 2602 either one (as in Fortran) or zero. See the users' manual for details. 2603 2604 Specify the preallocated storage with either nz or nnz (not both). 2605 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2606 allocation. For additional details, see the users manual chapter on 2607 matrices. 2608 2609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2610 @*/ 2611 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2612 { 2613 int ierr; 2614 2615 PetscFunctionBegin; 2616 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2617 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2618 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 #undef __FUNCT__ 2623 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2624 /*@C 2625 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2626 per row in the matrix. For good matrix assembly performance the 2627 user should preallocate the matrix storage by setting the parameter nz 2628 (or the array nnz). By setting these parameters accurately, performance 2629 during matrix assembly can be increased by more than a factor of 50. 2630 2631 Collective on MPI_Comm 2632 2633 Input Parameters: 2634 + A - the matrix 2635 . bs - size of block 2636 . nz - number of block nonzeros per block row (same for all rows) 2637 - nnz - array containing the number of block nonzeros in the various block rows 2638 (possibly different for each block row) or PETSC_NULL 2639 2640 Options Database Keys: 2641 . -mat_no_unroll - uses code that does not unroll the loops in the 2642 block calculations (much slower) 2643 . -mat_block_size - size of the blocks to use 2644 2645 Level: intermediate 2646 2647 Notes: 2648 The block AIJ format is fully compatible with standard Fortran 77 2649 storage. That is, the stored row and column indices can begin at 2650 either one (as in Fortran) or zero. See the users' manual for details. 2651 2652 Specify the preallocated storage with either nz or nnz (not both). 2653 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2654 allocation. For additional details, see the users manual chapter on 2655 matrices. 2656 2657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2658 @*/ 2659 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2660 { 2661 int ierr,(*f)(Mat,int,int,const int[]); 2662 2663 PetscFunctionBegin; 2664 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2665 if (f) { 2666 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2667 } 2668 PetscFunctionReturn(0); 2669 } 2670 2671