1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 15 int MatInvertBlockDiagonal_SeqBAIJ(Mat A) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr; 19 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) PetscFunctionReturn(0); 23 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 24 diag_offset = a->diag; 25 if (!a->idiag) { 26 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 27 } 28 diag = a->idiag; 29 mdiag = a->idiag+bs*bs*mbs; 30 /* factor and invert each block */ 31 switch (a->bs){ 32 case 2: 33 for (i=0; i<mbs; i++) { 34 odiag = v + 4*diag_offset[i]; 35 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 36 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 37 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 38 diag += 4; 39 mdiag += 4; 40 } 41 break; 42 case 3: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 9*diag_offset[i]; 45 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 46 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 47 diag[8] = odiag[8]; 48 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 49 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 50 mdiag[8] = odiag[8]; 51 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 52 diag += 9; 53 mdiag += 9; 54 } 55 break; 56 case 4: 57 for (i=0; i<mbs; i++) { 58 odiag = v + 16*diag_offset[i]; 59 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 61 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 62 diag += 16; 63 mdiag += 16; 64 } 65 break; 66 case 5: 67 for (i=0; i<mbs; i++) { 68 odiag = v + 25*diag_offset[i]; 69 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 71 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 72 diag += 25; 73 mdiag += 25; 74 } 75 break; 76 default: 77 SETERRQ1(1,"not supported for block size %d",a->bs); 78 } 79 a->idiagvalid = PETSC_TRUE; 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 85 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 88 PetscScalar *x,x1,x2,s1,s2; 89 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 90 int ierr,m = a->mbs,i,i2,nz,idx; 91 const int *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 int ierr,m = a->mbs,i,i2,nz,idx; 192 const int *diag,*ai = a->i,*aj = a->j,*vi; 193 194 PetscFunctionBegin; 195 its = its*lits; 196 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 197 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 198 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 199 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 200 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 201 202 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 203 204 diag = a->diag; 205 idiag = a->idiag; 206 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 207 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 208 209 if (flag & SOR_ZERO_INITIAL_GUESS) { 210 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 211 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 212 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 213 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 214 i2 = 3; 215 idiag += 9; 216 for (i=1; i<m; i++) { 217 v = aa + 9*ai[i]; 218 vi = aj + ai[i]; 219 nz = diag[i] - ai[i]; 220 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 221 while (nz--) { 222 idx = 3*(*vi++); 223 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 224 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 225 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 226 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 227 v += 9; 228 } 229 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 230 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 231 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 232 idiag += 9; 233 i2 += 3; 234 } 235 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 236 PetscLogFlops(9*(a->nz)); 237 } 238 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 239 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 240 i2 = 0; 241 mdiag = a->idiag+9*a->mbs; 242 for (i=0; i<m; i++) { 243 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 244 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 245 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 246 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 247 mdiag += 9; 248 i2 += 3; 249 } 250 PetscLogFlops(15*m); 251 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 252 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 255 idiag = a->idiag+9*a->mbs - 9; 256 i2 = 3*m - 3; 257 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 258 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 259 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 260 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 261 idiag -= 9; 262 i2 -= 3; 263 for (i=m-2; i>=0; i--) { 264 v = aa + 9*(diag[i]+1); 265 vi = aj + diag[i] + 1; 266 nz = ai[i+1] - diag[i] - 1; 267 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 268 while (nz--) { 269 idx = 3*(*vi++); 270 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 271 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 272 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 273 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 274 v += 9; 275 } 276 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 277 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 278 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 279 idiag -= 9; 280 i2 -= 3; 281 } 282 PetscLogFlops(9*(a->nz)); 283 } 284 } else { 285 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 286 } 287 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 288 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 294 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 295 { 296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 297 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 298 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 299 int ierr,m = a->mbs,i,i2,nz,idx; 300 const int *diag,*ai = a->i,*aj = a->j,*vi; 301 302 PetscFunctionBegin; 303 its = its*lits; 304 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 305 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 306 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 307 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 308 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 309 310 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 311 312 diag = a->diag; 313 idiag = a->idiag; 314 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 316 317 if (flag & SOR_ZERO_INITIAL_GUESS) { 318 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 319 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 320 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 321 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 322 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 323 i2 = 4; 324 idiag += 16; 325 for (i=1; i<m; i++) { 326 v = aa + 16*ai[i]; 327 vi = aj + ai[i]; 328 nz = diag[i] - ai[i]; 329 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 330 while (nz--) { 331 idx = 4*(*vi++); 332 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 333 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 334 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 335 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 336 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 337 v += 16; 338 } 339 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 340 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 341 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 342 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 343 idiag += 16; 344 i2 += 4; 345 } 346 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 347 PetscLogFlops(16*(a->nz)); 348 } 349 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 350 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 351 i2 = 0; 352 mdiag = a->idiag+16*a->mbs; 353 for (i=0; i<m; i++) { 354 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 355 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 356 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 357 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 358 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 359 mdiag += 16; 360 i2 += 4; 361 } 362 PetscLogFlops(28*m); 363 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 364 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 365 } 366 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 367 idiag = a->idiag+16*a->mbs - 16; 368 i2 = 4*m - 4; 369 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 370 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 371 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 372 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 373 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 374 idiag -= 16; 375 i2 -= 4; 376 for (i=m-2; i>=0; i--) { 377 v = aa + 16*(diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 381 while (nz--) { 382 idx = 4*(*vi++); 383 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 384 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 385 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 386 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 387 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 388 v += 16; 389 } 390 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 391 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 392 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 393 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 394 idiag -= 16; 395 i2 -= 4; 396 } 397 PetscLogFlops(16*(a->nz)); 398 } 399 } else { 400 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 401 } 402 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 403 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 409 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 410 { 411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 412 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 413 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 414 int ierr,m = a->mbs,i,i2,nz,idx; 415 const int *diag,*ai = a->i,*aj = a->j,*vi; 416 417 PetscFunctionBegin; 418 its = its*lits; 419 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 420 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 421 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 422 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 423 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 424 425 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 426 427 diag = a->diag; 428 idiag = a->idiag; 429 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 431 432 if (flag & SOR_ZERO_INITIAL_GUESS) { 433 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 434 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 435 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 436 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 437 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 438 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 439 i2 = 5; 440 idiag += 25; 441 for (i=1; i<m; i++) { 442 v = aa + 25*ai[i]; 443 vi = aj + ai[i]; 444 nz = diag[i] - ai[i]; 445 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 446 while (nz--) { 447 idx = 5*(*vi++); 448 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 449 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 450 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 451 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 452 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 453 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 454 v += 25; 455 } 456 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 457 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 458 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 459 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 460 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 461 idiag += 25; 462 i2 += 5; 463 } 464 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 465 PetscLogFlops(25*(a->nz)); 466 } 467 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 468 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 469 i2 = 0; 470 mdiag = a->idiag+25*a->mbs; 471 for (i=0; i<m; i++) { 472 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 473 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 474 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 475 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 476 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 477 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 478 mdiag += 25; 479 i2 += 5; 480 } 481 PetscLogFlops(45*m); 482 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 483 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 484 } 485 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 486 idiag = a->idiag+25*a->mbs - 25; 487 i2 = 5*m - 5; 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 489 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 490 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 491 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 492 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 493 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 494 idiag -= 25; 495 i2 -= 5; 496 for (i=m-2; i>=0; i--) { 497 v = aa + 25*(diag[i]+1); 498 vi = aj + diag[i] + 1; 499 nz = ai[i+1] - diag[i] - 1; 500 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 501 while (nz--) { 502 idx = 5*(*vi++); 503 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 504 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 505 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 506 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 507 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 508 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 509 v += 25; 510 } 511 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 512 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 513 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 514 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 515 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 516 idiag -= 25; 517 i2 -= 5; 518 } 519 PetscLogFlops(25*(a->nz)); 520 } 521 } else { 522 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 523 } 524 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 525 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 /* 530 Special version for Fun3d sequential benchmark 531 */ 532 #if defined(PETSC_HAVE_FORTRAN_CAPS) 533 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 534 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 535 #define matsetvaluesblocked4_ matsetvaluesblocked4 536 #endif 537 538 EXTERN_C_BEGIN 539 #undef __FUNCT__ 540 #define __FUNCT__ "matsetvaluesblocked4_" 541 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 542 { 543 Mat A = *AA; 544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 545 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 546 int *ai=a->i,*ailen=a->ilen; 547 int *aj=a->j,stepval; 548 const PetscScalar *value = v; 549 MatScalar *ap,*aa = a->a,*bap; 550 551 PetscFunctionBegin; 552 stepval = (n-1)*4; 553 for (k=0; k<m; k++) { /* loop over added rows */ 554 row = im[k]; 555 rp = aj + ai[row]; 556 ap = aa + 16*ai[row]; 557 nrow = ailen[row]; 558 low = 0; 559 for (l=0; l<n; l++) { /* loop over added columns */ 560 col = in[l]; 561 value = v + k*(stepval+4)*4 + l*4; 562 low = 0; high = nrow; 563 while (high-low > 7) { 564 t = (low+high)/2; 565 if (rp[t] > col) high = t; 566 else low = t; 567 } 568 for (i=low; i<high; i++) { 569 if (rp[i] > col) break; 570 if (rp[i] == col) { 571 bap = ap + 16*i; 572 for (ii=0; ii<4; ii++,value+=stepval) { 573 for (jj=ii; jj<16; jj+=4) { 574 bap[jj] += *value++; 575 } 576 } 577 goto noinsert2; 578 } 579 } 580 N = nrow++ - 1; 581 /* shift up all the later entries in this row */ 582 for (ii=N; ii>=i; ii--) { 583 rp[ii+1] = rp[ii]; 584 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 585 } 586 if (N >= i) { 587 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 588 } 589 rp[i] = col; 590 bap = ap + 16*i; 591 for (ii=0; ii<4; ii++,value+=stepval) { 592 for (jj=ii; jj<16; jj+=4) { 593 bap[jj] = *value++; 594 } 595 } 596 noinsert2:; 597 low = i; 598 } 599 ailen[row] = nrow; 600 } 601 } 602 EXTERN_C_END 603 604 #if defined(PETSC_HAVE_FORTRAN_CAPS) 605 #define matsetvalues4_ MATSETVALUES4 606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 607 #define matsetvalues4_ matsetvalues4 608 #endif 609 610 EXTERN_C_BEGIN 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatSetValues4_" 613 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 614 { 615 Mat A = *AA; 616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 617 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 618 int *ai=a->i,*ailen=a->ilen; 619 int *aj=a->j,brow,bcol; 620 int ridx,cidx; 621 MatScalar *ap,value,*aa=a->a,*bap; 622 623 PetscFunctionBegin; 624 for (k=0; k<m; k++) { /* loop over added rows */ 625 row = im[k]; brow = row/4; 626 rp = aj + ai[brow]; 627 ap = aa + 16*ai[brow]; 628 nrow = ailen[brow]; 629 low = 0; 630 for (l=0; l<n; l++) { /* loop over added columns */ 631 col = in[l]; bcol = col/4; 632 ridx = row % 4; cidx = col % 4; 633 value = v[l + k*n]; 634 low = 0; high = nrow; 635 while (high-low > 7) { 636 t = (low+high)/2; 637 if (rp[t] > bcol) high = t; 638 else low = t; 639 } 640 for (i=low; i<high; i++) { 641 if (rp[i] > bcol) break; 642 if (rp[i] == bcol) { 643 bap = ap + 16*i + 4*cidx + ridx; 644 *bap += value; 645 goto noinsert1; 646 } 647 } 648 N = nrow++ - 1; 649 /* shift up all the later entries in this row */ 650 for (ii=N; ii>=i; ii--) { 651 rp[ii+1] = rp[ii]; 652 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 653 } 654 if (N>=i) { 655 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 656 } 657 rp[i] = bcol; 658 ap[16*i + 4*cidx + ridx] = value; 659 noinsert1:; 660 low = i; 661 } 662 ailen[brow] = nrow; 663 } 664 } 665 EXTERN_C_END 666 667 /* UGLY, ugly, ugly 668 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 669 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 670 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 671 converts the entries into single precision and then calls ..._MatScalar() to put them 672 into the single precision data structures. 673 */ 674 #if defined(PETSC_USE_MAT_SINGLE) 675 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 676 #else 677 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 678 #endif 679 680 #define CHUNKSIZE 10 681 682 /* 683 Checks for missing diagonals 684 */ 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 687 int MatMissingDiagonal_SeqBAIJ(Mat A) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 int *diag,*jj = a->j,i,ierr; 691 692 PetscFunctionBegin; 693 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 694 diag = a->diag; 695 for (i=0; i<a->mbs; i++) { 696 if (jj[diag[i]] != i) { 697 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 698 } 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 705 int MatMarkDiagonal_SeqBAIJ(Mat A) 706 { 707 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 708 int i,j,*diag,m = a->mbs,ierr; 709 710 PetscFunctionBegin; 711 if (a->diag) PetscFunctionReturn(0); 712 713 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 714 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 715 for (i=0; i<m; i++) { 716 diag[i] = a->i[i+1]; 717 for (j=a->i[i]; j<a->i[i+1]; j++) { 718 if (a->j[j] == i) { 719 diag[i] = j; 720 break; 721 } 722 } 723 } 724 a->diag = diag; 725 PetscFunctionReturn(0); 726 } 727 728 729 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 733 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 int ierr,n = a->mbs,i; 737 738 PetscFunctionBegin; 739 *nn = n; 740 if (!ia) PetscFunctionReturn(0); 741 if (symmetric) { 742 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 743 } else if (oshift == 1) { 744 /* temporarily add 1 to i and j indices */ 745 int nz = a->i[n]; 746 for (i=0; i<nz; i++) a->j[i]++; 747 for (i=0; i<n+1; i++) a->i[i]++; 748 *ia = a->i; *ja = a->j; 749 } else { 750 *ia = a->i; *ja = a->j; 751 } 752 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 758 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 int i,n = a->mbs,ierr; 762 763 PetscFunctionBegin; 764 if (!ia) PetscFunctionReturn(0); 765 if (symmetric) { 766 ierr = PetscFree(*ia);CHKERRQ(ierr); 767 ierr = PetscFree(*ja);CHKERRQ(ierr); 768 } else if (oshift == 1) { 769 int nz = a->i[n]-1; 770 for (i=0; i<nz; i++) a->j[i]--; 771 for (i=0; i<n+1; i++) a->i[i]--; 772 } 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 778 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 779 { 780 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 781 782 PetscFunctionBegin; 783 *bs = baij->bs; 784 PetscFunctionReturn(0); 785 } 786 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatDestroy_SeqBAIJ" 790 int MatDestroy_SeqBAIJ(Mat A) 791 { 792 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 793 int ierr; 794 795 PetscFunctionBegin; 796 #if defined(PETSC_USE_LOG) 797 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 798 #endif 799 ierr = PetscFree(a->a);CHKERRQ(ierr); 800 if (!a->singlemalloc) { 801 ierr = PetscFree(a->i);CHKERRQ(ierr); 802 ierr = PetscFree(a->j);CHKERRQ(ierr); 803 } 804 if (a->row) { 805 ierr = ISDestroy(a->row);CHKERRQ(ierr); 806 } 807 if (a->col) { 808 ierr = ISDestroy(a->col);CHKERRQ(ierr); 809 } 810 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 811 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 812 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 813 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 814 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 815 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 816 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 817 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 818 #if defined(PETSC_USE_MAT_SINGLE) 819 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 820 #endif 821 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 822 823 ierr = PetscFree(a);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSetOption_SeqBAIJ" 829 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 832 833 PetscFunctionBegin; 834 switch (op) { 835 case MAT_ROW_ORIENTED: 836 a->roworiented = PETSC_TRUE; 837 break; 838 case MAT_COLUMN_ORIENTED: 839 a->roworiented = PETSC_FALSE; 840 break; 841 case MAT_COLUMNS_SORTED: 842 a->sorted = PETSC_TRUE; 843 break; 844 case MAT_COLUMNS_UNSORTED: 845 a->sorted = PETSC_FALSE; 846 break; 847 case MAT_KEEP_ZEROED_ROWS: 848 a->keepzeroedrows = PETSC_TRUE; 849 break; 850 case MAT_NO_NEW_NONZERO_LOCATIONS: 851 a->nonew = 1; 852 break; 853 case MAT_NEW_NONZERO_LOCATION_ERR: 854 a->nonew = -1; 855 break; 856 case MAT_NEW_NONZERO_ALLOCATION_ERR: 857 a->nonew = -2; 858 break; 859 case MAT_YES_NEW_NONZERO_LOCATIONS: 860 a->nonew = 0; 861 break; 862 case MAT_ROWS_SORTED: 863 case MAT_ROWS_UNSORTED: 864 case MAT_YES_NEW_DIAGONALS: 865 case MAT_IGNORE_OFF_PROC_ENTRIES: 866 case MAT_USE_HASH_TABLE: 867 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 868 break; 869 case MAT_NO_NEW_DIAGONALS: 870 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 871 case MAT_SYMMETRIC: 872 case MAT_STRUCTURALLY_SYMMETRIC: 873 case MAT_NOT_SYMMETRIC: 874 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 875 case MAT_HERMITIAN: 876 case MAT_NOT_HERMITIAN: 877 case MAT_SYMMETRY_ETERNAL: 878 case MAT_NOT_SYMMETRY_ETERNAL: 879 break; 880 default: 881 SETERRQ(PETSC_ERR_SUP,"unknown option"); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatGetRow_SeqBAIJ" 888 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 891 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 892 MatScalar *aa,*aa_i; 893 PetscScalar *v_i; 894 895 PetscFunctionBegin; 896 bs = a->bs; 897 ai = a->i; 898 aj = a->j; 899 aa = a->a; 900 bs2 = a->bs2; 901 902 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 903 904 bn = row/bs; /* Block number */ 905 bp = row % bs; /* Block Position */ 906 M = ai[bn+1] - ai[bn]; 907 *nz = bs*M; 908 909 if (v) { 910 *v = 0; 911 if (*nz) { 912 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 913 for (i=0; i<M; i++) { /* for each block in the block row */ 914 v_i = *v + i*bs; 915 aa_i = aa + bs2*(ai[bn] + i); 916 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 917 } 918 } 919 } 920 921 if (idx) { 922 *idx = 0; 923 if (*nz) { 924 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 925 for (i=0; i<M; i++) { /* for each block in the block row */ 926 idx_i = *idx + i*bs; 927 itmp = bs*aj[ai[bn] + i]; 928 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 929 } 930 } 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 937 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 938 { 939 int ierr; 940 941 PetscFunctionBegin; 942 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 943 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatTranspose_SeqBAIJ" 949 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 950 { 951 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 952 Mat C; 953 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 954 int *rows,*cols,bs2=a->bs2; 955 PetscScalar *array; 956 957 PetscFunctionBegin; 958 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 959 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 960 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 961 962 #if defined(PETSC_USE_MAT_SINGLE) 963 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 964 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 965 #else 966 array = a->a; 967 #endif 968 969 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 970 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 971 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 972 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 973 ierr = PetscFree(col);CHKERRQ(ierr); 974 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 975 cols = rows + bs; 976 for (i=0; i<mbs; i++) { 977 cols[0] = i*bs; 978 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 979 len = ai[i+1] - ai[i]; 980 for (j=0; j<len; j++) { 981 rows[0] = (*aj++)*bs; 982 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 983 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 984 array += bs2; 985 } 986 } 987 ierr = PetscFree(rows);CHKERRQ(ierr); 988 #if defined(PETSC_USE_MAT_SINGLE) 989 ierr = PetscFree(array); 990 #endif 991 992 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 995 if (B) { 996 *B = C; 997 } else { 998 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1005 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1006 { 1007 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1008 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1009 PetscScalar *aa; 1010 FILE *file; 1011 1012 PetscFunctionBegin; 1013 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1014 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1015 col_lens[0] = MAT_FILE_COOKIE; 1016 1017 col_lens[1] = A->m; 1018 col_lens[2] = A->n; 1019 col_lens[3] = a->nz*bs2; 1020 1021 /* store lengths of each row and write (including header) to file */ 1022 count = 0; 1023 for (i=0; i<a->mbs; i++) { 1024 for (j=0; j<bs; j++) { 1025 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1026 } 1027 } 1028 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1029 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1030 1031 /* store column indices (zero start index) */ 1032 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1033 count = 0; 1034 for (i=0; i<a->mbs; i++) { 1035 for (j=0; j<bs; j++) { 1036 for (k=a->i[i]; k<a->i[i+1]; k++) { 1037 for (l=0; l<bs; l++) { 1038 jj[count++] = bs*a->j[k] + l; 1039 } 1040 } 1041 } 1042 } 1043 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1044 ierr = PetscFree(jj);CHKERRQ(ierr); 1045 1046 /* store nonzero values */ 1047 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1048 count = 0; 1049 for (i=0; i<a->mbs; i++) { 1050 for (j=0; j<bs; j++) { 1051 for (k=a->i[i]; k<a->i[i+1]; k++) { 1052 for (l=0; l<bs; l++) { 1053 aa[count++] = a->a[bs2*k + l*bs + j]; 1054 } 1055 } 1056 } 1057 } 1058 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1059 ierr = PetscFree(aa);CHKERRQ(ierr); 1060 1061 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1062 if (file) { 1063 fprintf(file,"-matload_block_size %d\n",a->bs); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1070 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1071 { 1072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1073 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 1074 PetscViewerFormat format; 1075 1076 PetscFunctionBegin; 1077 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1078 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1079 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1080 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1081 Mat aij; 1082 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1083 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1084 ierr = MatDestroy(aij);CHKERRQ(ierr); 1085 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1086 PetscFunctionReturn(0); 1087 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1088 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1089 for (i=0; i<a->mbs; i++) { 1090 for (j=0; j<bs; j++) { 1091 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1092 for (k=a->i[i]; k<a->i[i+1]; k++) { 1093 for (l=0; l<bs; l++) { 1094 #if defined(PETSC_USE_COMPLEX) 1095 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1096 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1097 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1098 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1099 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1100 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1101 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1102 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1103 } 1104 #else 1105 if (a->a[bs2*k + l*bs + j] != 0.0) { 1106 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1107 } 1108 #endif 1109 } 1110 } 1111 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1112 } 1113 } 1114 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1115 } else { 1116 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1117 for (i=0; i<a->mbs; i++) { 1118 for (j=0; j<bs; j++) { 1119 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1120 for (k=a->i[i]; k<a->i[i+1]; k++) { 1121 for (l=0; l<bs; l++) { 1122 #if defined(PETSC_USE_COMPLEX) 1123 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1124 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1125 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1126 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1127 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1128 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1129 } else { 1130 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1131 } 1132 #else 1133 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1134 #endif 1135 } 1136 } 1137 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1138 } 1139 } 1140 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1141 } 1142 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1148 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1149 { 1150 Mat A = (Mat) Aa; 1151 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1152 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1153 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1154 MatScalar *aa; 1155 PetscViewer viewer; 1156 1157 PetscFunctionBegin; 1158 1159 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1160 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1161 1162 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1163 1164 /* loop over matrix elements drawing boxes */ 1165 color = PETSC_DRAW_BLUE; 1166 for (i=0,row=0; i<mbs; i++,row+=bs) { 1167 for (j=a->i[i]; j<a->i[i+1]; j++) { 1168 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1169 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1170 aa = a->a + j*bs2; 1171 for (k=0; k<bs; k++) { 1172 for (l=0; l<bs; l++) { 1173 if (PetscRealPart(*aa++) >= 0.) continue; 1174 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1175 } 1176 } 1177 } 1178 } 1179 color = PETSC_DRAW_CYAN; 1180 for (i=0,row=0; i<mbs; i++,row+=bs) { 1181 for (j=a->i[i]; j<a->i[i+1]; j++) { 1182 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1183 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1184 aa = a->a + j*bs2; 1185 for (k=0; k<bs; k++) { 1186 for (l=0; l<bs; l++) { 1187 if (PetscRealPart(*aa++) != 0.) continue; 1188 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1189 } 1190 } 1191 } 1192 } 1193 1194 color = PETSC_DRAW_RED; 1195 for (i=0,row=0; i<mbs; i++,row+=bs) { 1196 for (j=a->i[i]; j<a->i[i+1]; j++) { 1197 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1198 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1199 aa = a->a + j*bs2; 1200 for (k=0; k<bs; k++) { 1201 for (l=0; l<bs; l++) { 1202 if (PetscRealPart(*aa++) <= 0.) continue; 1203 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1204 } 1205 } 1206 } 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1213 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1214 { 1215 int ierr; 1216 PetscReal xl,yl,xr,yr,w,h; 1217 PetscDraw draw; 1218 PetscTruth isnull; 1219 1220 PetscFunctionBegin; 1221 1222 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1223 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1224 1225 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1226 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1227 xr += w; yr += h; xl = -w; yl = -h; 1228 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1229 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1230 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatView_SeqBAIJ" 1236 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1237 { 1238 int ierr; 1239 PetscTruth isascii,isbinary,isdraw; 1240 1241 PetscFunctionBegin; 1242 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1243 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1244 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1245 if (isascii){ 1246 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1247 } else if (isbinary) { 1248 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1249 } else if (isdraw) { 1250 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1251 } else { 1252 Mat B; 1253 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1254 ierr = MatView(B,viewer);CHKERRQ(ierr); 1255 ierr = MatDestroy(B);CHKERRQ(ierr); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1263 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1264 { 1265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1266 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1267 int *ai = a->i,*ailen = a->ilen; 1268 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1269 MatScalar *ap,*aa = a->a,zero = 0.0; 1270 1271 PetscFunctionBegin; 1272 for (k=0; k<m; k++) { /* loop over rows */ 1273 row = im[k]; brow = row/bs; 1274 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1275 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1276 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1277 nrow = ailen[brow]; 1278 for (l=0; l<n; l++) { /* loop over columns */ 1279 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1280 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1281 col = in[l] ; 1282 bcol = col/bs; 1283 cidx = col%bs; 1284 ridx = row%bs; 1285 high = nrow; 1286 low = 0; /* assume unsorted */ 1287 while (high-low > 5) { 1288 t = (low+high)/2; 1289 if (rp[t] > bcol) high = t; 1290 else low = t; 1291 } 1292 for (i=low; i<high; i++) { 1293 if (rp[i] > bcol) break; 1294 if (rp[i] == bcol) { 1295 *v++ = ap[bs2*i+bs*cidx+ridx]; 1296 goto finished; 1297 } 1298 } 1299 *v++ = zero; 1300 finished:; 1301 } 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #if defined(PETSC_USE_MAT_SINGLE) 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1309 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1310 { 1311 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1312 int ierr,i,N = m*n*b->bs2; 1313 MatScalar *vsingle; 1314 1315 PetscFunctionBegin; 1316 if (N > b->setvalueslen) { 1317 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1318 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1319 b->setvalueslen = N; 1320 } 1321 vsingle = b->setvaluescopy; 1322 for (i=0; i<N; i++) { 1323 vsingle[i] = v[i]; 1324 } 1325 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 #endif 1329 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1333 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1334 { 1335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1336 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1337 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1338 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 1339 PetscTruth roworiented=a->roworiented; 1340 const MatScalar *value = v; 1341 MatScalar *ap,*aa = a->a,*bap; 1342 1343 PetscFunctionBegin; 1344 if (roworiented) { 1345 stepval = (n-1)*bs; 1346 } else { 1347 stepval = (m-1)*bs; 1348 } 1349 for (k=0; k<m; k++) { /* loop over added rows */ 1350 row = im[k]; 1351 if (row < 0) continue; 1352 #if defined(PETSC_USE_BOPT_g) 1353 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1354 #endif 1355 rp = aj + ai[row]; 1356 ap = aa + bs2*ai[row]; 1357 rmax = imax[row]; 1358 nrow = ailen[row]; 1359 low = 0; 1360 for (l=0; l<n; l++) { /* loop over added columns */ 1361 if (in[l] < 0) continue; 1362 #if defined(PETSC_USE_BOPT_g) 1363 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1364 #endif 1365 col = in[l]; 1366 if (roworiented) { 1367 value = v + k*(stepval+bs)*bs + l*bs; 1368 } else { 1369 value = v + l*(stepval+bs)*bs + k*bs; 1370 } 1371 if (!sorted) low = 0; high = nrow; 1372 while (high-low > 7) { 1373 t = (low+high)/2; 1374 if (rp[t] > col) high = t; 1375 else low = t; 1376 } 1377 for (i=low; i<high; i++) { 1378 if (rp[i] > col) break; 1379 if (rp[i] == col) { 1380 bap = ap + bs2*i; 1381 if (roworiented) { 1382 if (is == ADD_VALUES) { 1383 for (ii=0; ii<bs; ii++,value+=stepval) { 1384 for (jj=ii; jj<bs2; jj+=bs) { 1385 bap[jj] += *value++; 1386 } 1387 } 1388 } else { 1389 for (ii=0; ii<bs; ii++,value+=stepval) { 1390 for (jj=ii; jj<bs2; jj+=bs) { 1391 bap[jj] = *value++; 1392 } 1393 } 1394 } 1395 } else { 1396 if (is == ADD_VALUES) { 1397 for (ii=0; ii<bs; ii++,value+=stepval) { 1398 for (jj=0; jj<bs; jj++) { 1399 *bap++ += *value++; 1400 } 1401 } 1402 } else { 1403 for (ii=0; ii<bs; ii++,value+=stepval) { 1404 for (jj=0; jj<bs; jj++) { 1405 *bap++ = *value++; 1406 } 1407 } 1408 } 1409 } 1410 goto noinsert2; 1411 } 1412 } 1413 if (nonew == 1) goto noinsert2; 1414 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1415 if (nrow >= rmax) { 1416 /* there is no extra room in row, therefore enlarge */ 1417 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1418 MatScalar *new_a; 1419 1420 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1421 1422 /* malloc new storage space */ 1423 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1424 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1425 new_j = (int*)(new_a + bs2*new_nz); 1426 new_i = new_j + new_nz; 1427 1428 /* copy over old data into new slots */ 1429 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1430 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1431 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1432 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1433 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1434 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1435 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1436 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1437 /* free up old matrix storage */ 1438 ierr = PetscFree(a->a);CHKERRQ(ierr); 1439 if (!a->singlemalloc) { 1440 ierr = PetscFree(a->i);CHKERRQ(ierr); 1441 ierr = PetscFree(a->j);CHKERRQ(ierr); 1442 } 1443 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1444 a->singlemalloc = PETSC_TRUE; 1445 1446 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1447 rmax = imax[row] = imax[row] + CHUNKSIZE; 1448 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1449 a->maxnz += bs2*CHUNKSIZE; 1450 a->reallocs++; 1451 a->nz++; 1452 } 1453 N = nrow++ - 1; 1454 /* shift up all the later entries in this row */ 1455 for (ii=N; ii>=i; ii--) { 1456 rp[ii+1] = rp[ii]; 1457 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1458 } 1459 if (N >= i) { 1460 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1461 } 1462 rp[i] = col; 1463 bap = ap + bs2*i; 1464 if (roworiented) { 1465 for (ii=0; ii<bs; ii++,value+=stepval) { 1466 for (jj=ii; jj<bs2; jj+=bs) { 1467 bap[jj] = *value++; 1468 } 1469 } 1470 } else { 1471 for (ii=0; ii<bs; ii++,value+=stepval) { 1472 for (jj=0; jj<bs; jj++) { 1473 *bap++ = *value++; 1474 } 1475 } 1476 } 1477 noinsert2:; 1478 low = i; 1479 } 1480 ailen[row] = nrow; 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1487 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1488 { 1489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1490 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1491 int m = A->m,*ip,N,*ailen = a->ilen; 1492 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 1493 MatScalar *aa = a->a,*ap; 1494 1495 PetscFunctionBegin; 1496 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1497 1498 if (m) rmax = ailen[0]; 1499 for (i=1; i<mbs; i++) { 1500 /* move each row back by the amount of empty slots (fshift) before it*/ 1501 fshift += imax[i-1] - ailen[i-1]; 1502 rmax = PetscMax(rmax,ailen[i]); 1503 if (fshift) { 1504 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1505 N = ailen[i]; 1506 for (j=0; j<N; j++) { 1507 ip[j-fshift] = ip[j]; 1508 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1509 } 1510 } 1511 ai[i] = ai[i-1] + ailen[i-1]; 1512 } 1513 if (mbs) { 1514 fshift += imax[mbs-1] - ailen[mbs-1]; 1515 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1516 } 1517 /* reset ilen and imax for each row */ 1518 for (i=0; i<mbs; i++) { 1519 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1520 } 1521 a->nz = ai[mbs]; 1522 1523 /* diagonals may have moved, so kill the diagonal pointers */ 1524 a->idiagvalid = PETSC_FALSE; 1525 if (fshift && a->diag) { 1526 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1527 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1528 a->diag = 0; 1529 } 1530 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1531 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1532 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1533 a->reallocs = 0; 1534 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1535 1536 PetscFunctionReturn(0); 1537 } 1538 1539 1540 1541 /* 1542 This function returns an array of flags which indicate the locations of contiguous 1543 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1544 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1545 Assume: sizes should be long enough to hold all the values. 1546 */ 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1549 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1550 { 1551 int i,j,k,row; 1552 PetscTruth flg; 1553 1554 PetscFunctionBegin; 1555 for (i=0,j=0; i<n; j++) { 1556 row = idx[i]; 1557 if (row%bs!=0) { /* Not the begining of a block */ 1558 sizes[j] = 1; 1559 i++; 1560 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1561 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1562 i++; 1563 } else { /* Begining of the block, so check if the complete block exists */ 1564 flg = PETSC_TRUE; 1565 for (k=1; k<bs; k++) { 1566 if (row+k != idx[i+k]) { /* break in the block */ 1567 flg = PETSC_FALSE; 1568 break; 1569 } 1570 } 1571 if (flg == PETSC_TRUE) { /* No break in the bs */ 1572 sizes[j] = bs; 1573 i+= bs; 1574 } else { 1575 sizes[j] = 1; 1576 i++; 1577 } 1578 } 1579 } 1580 *bs_max = j; 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1586 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1587 { 1588 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1589 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1590 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1591 PetscScalar zero = 0.0; 1592 MatScalar *aa; 1593 1594 PetscFunctionBegin; 1595 /* Make a copy of the IS and sort it */ 1596 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1597 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1598 1599 /* allocate memory for rows,sizes */ 1600 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1601 sizes = rows + is_n; 1602 1603 /* copy IS values to rows, and sort them */ 1604 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1605 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1606 if (baij->keepzeroedrows) { 1607 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1608 bs_max = is_n; 1609 } else { 1610 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1611 } 1612 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1613 1614 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1615 row = rows[j]; 1616 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1617 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1618 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1619 if (sizes[i] == bs && !baij->keepzeroedrows) { 1620 if (diag) { 1621 if (baij->ilen[row/bs] > 0) { 1622 baij->ilen[row/bs] = 1; 1623 baij->j[baij->i[row/bs]] = row/bs; 1624 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1625 } 1626 /* Now insert all the diagonal values for this bs */ 1627 for (k=0; k<bs; k++) { 1628 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 } else { /* (!diag) */ 1631 baij->ilen[row/bs] = 0; 1632 } /* end (!diag) */ 1633 } else { /* (sizes[i] != bs) */ 1634 #if defined (PETSC_USE_DEBUG) 1635 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1636 #endif 1637 for (k=0; k<count; k++) { 1638 aa[0] = zero; 1639 aa += bs; 1640 } 1641 if (diag) { 1642 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1643 } 1644 } 1645 } 1646 1647 ierr = PetscFree(rows);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1654 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1655 { 1656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1657 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1658 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1659 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1660 int ridx,cidx,bs2=a->bs2,ierr; 1661 PetscTruth roworiented=a->roworiented; 1662 MatScalar *ap,value,*aa=a->a,*bap; 1663 1664 PetscFunctionBegin; 1665 for (k=0; k<m; k++) { /* loop over added rows */ 1666 row = im[k]; brow = row/bs; 1667 if (row < 0) continue; 1668 #if defined(PETSC_USE_BOPT_g) 1669 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1670 #endif 1671 rp = aj + ai[brow]; 1672 ap = aa + bs2*ai[brow]; 1673 rmax = imax[brow]; 1674 nrow = ailen[brow]; 1675 low = 0; 1676 for (l=0; l<n; l++) { /* loop over added columns */ 1677 if (in[l] < 0) continue; 1678 #if defined(PETSC_USE_BOPT_g) 1679 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1680 #endif 1681 col = in[l]; bcol = col/bs; 1682 ridx = row % bs; cidx = col % bs; 1683 if (roworiented) { 1684 value = v[l + k*n]; 1685 } else { 1686 value = v[k + l*m]; 1687 } 1688 if (!sorted) low = 0; high = nrow; 1689 while (high-low > 7) { 1690 t = (low+high)/2; 1691 if (rp[t] > bcol) high = t; 1692 else low = t; 1693 } 1694 for (i=low; i<high; i++) { 1695 if (rp[i] > bcol) break; 1696 if (rp[i] == bcol) { 1697 bap = ap + bs2*i + bs*cidx + ridx; 1698 if (is == ADD_VALUES) *bap += value; 1699 else *bap = value; 1700 goto noinsert1; 1701 } 1702 } 1703 if (nonew == 1) goto noinsert1; 1704 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1705 if (nrow >= rmax) { 1706 /* there is no extra room in row, therefore enlarge */ 1707 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1708 MatScalar *new_a; 1709 1710 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1711 1712 /* Malloc new storage space */ 1713 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1714 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1715 new_j = (int*)(new_a + bs2*new_nz); 1716 new_i = new_j + new_nz; 1717 1718 /* copy over old data into new slots */ 1719 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1720 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1721 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1722 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1723 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1724 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1725 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1726 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1727 /* free up old matrix storage */ 1728 ierr = PetscFree(a->a);CHKERRQ(ierr); 1729 if (!a->singlemalloc) { 1730 ierr = PetscFree(a->i);CHKERRQ(ierr); 1731 ierr = PetscFree(a->j);CHKERRQ(ierr); 1732 } 1733 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1734 a->singlemalloc = PETSC_TRUE; 1735 1736 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1737 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1738 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1739 a->maxnz += bs2*CHUNKSIZE; 1740 a->reallocs++; 1741 a->nz++; 1742 } 1743 N = nrow++ - 1; 1744 /* shift up all the later entries in this row */ 1745 for (ii=N; ii>=i; ii--) { 1746 rp[ii+1] = rp[ii]; 1747 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1748 } 1749 if (N>=i) { 1750 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1751 } 1752 rp[i] = bcol; 1753 ap[bs2*i + bs*cidx + ridx] = value; 1754 noinsert1:; 1755 low = i; 1756 } 1757 ailen[brow] = nrow; 1758 } 1759 PetscFunctionReturn(0); 1760 } 1761 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1765 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1766 { 1767 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1768 Mat outA; 1769 int ierr; 1770 PetscTruth row_identity,col_identity; 1771 1772 PetscFunctionBegin; 1773 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1774 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1775 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1776 if (!row_identity || !col_identity) { 1777 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1778 } 1779 1780 outA = inA; 1781 inA->factor = FACTOR_LU; 1782 1783 if (!a->diag) { 1784 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1785 } 1786 1787 a->row = row; 1788 a->col = col; 1789 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1790 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1791 1792 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1793 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1794 PetscLogObjectParent(inA,a->icol); 1795 1796 /* 1797 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1798 for ILU(0) factorization with natural ordering 1799 */ 1800 if (a->bs < 8) { 1801 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1802 } else { 1803 if (!a->solve_work) { 1804 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1805 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1806 } 1807 } 1808 1809 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1810 1811 PetscFunctionReturn(0); 1812 } 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1815 int MatPrintHelp_SeqBAIJ(Mat A) 1816 { 1817 static PetscTruth called = PETSC_FALSE; 1818 MPI_Comm comm = A->comm; 1819 int ierr; 1820 1821 PetscFunctionBegin; 1822 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1823 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1824 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 EXTERN_C_BEGIN 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1831 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1832 { 1833 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1834 int i,nz,nbs; 1835 1836 PetscFunctionBegin; 1837 nz = baij->maxnz/baij->bs2; 1838 nbs = baij->nbs; 1839 for (i=0; i<nz; i++) { 1840 baij->j[i] = indices[i]; 1841 } 1842 baij->nz = nz; 1843 for (i=0; i<nbs; i++) { 1844 baij->ilen[i] = baij->imax[i]; 1845 } 1846 1847 PetscFunctionReturn(0); 1848 } 1849 EXTERN_C_END 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1853 /*@ 1854 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1855 in the matrix. 1856 1857 Input Parameters: 1858 + mat - the SeqBAIJ matrix 1859 - indices - the column indices 1860 1861 Level: advanced 1862 1863 Notes: 1864 This can be called if you have precomputed the nonzero structure of the 1865 matrix and want to provide it to the matrix object to improve the performance 1866 of the MatSetValues() operation. 1867 1868 You MUST have set the correct numbers of nonzeros per row in the call to 1869 MatCreateSeqBAIJ(). 1870 1871 MUST be called before any calls to MatSetValues(); 1872 1873 @*/ 1874 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1875 { 1876 int ierr,(*f)(Mat,int *); 1877 1878 PetscFunctionBegin; 1879 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1880 PetscValidPointer(indices,2); 1881 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1882 if (f) { 1883 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1884 } else { 1885 SETERRQ(1,"Wrong type of matrix to set column indices"); 1886 } 1887 PetscFunctionReturn(0); 1888 } 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1892 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1893 { 1894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1895 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1896 PetscReal atmp; 1897 PetscScalar *x,zero = 0.0; 1898 MatScalar *aa; 1899 int ncols,brow,krow,kcol; 1900 1901 PetscFunctionBegin; 1902 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1903 bs = a->bs; 1904 aa = a->a; 1905 ai = a->i; 1906 aj = a->j; 1907 mbs = a->mbs; 1908 1909 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1910 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1911 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1912 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1913 for (i=0; i<mbs; i++) { 1914 ncols = ai[1] - ai[0]; ai++; 1915 brow = bs*i; 1916 for (j=0; j<ncols; j++){ 1917 /* bcol = bs*(*aj); */ 1918 for (kcol=0; kcol<bs; kcol++){ 1919 for (krow=0; krow<bs; krow++){ 1920 atmp = PetscAbsScalar(*aa); aa++; 1921 row = brow + krow; /* row index */ 1922 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1923 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1924 } 1925 } 1926 aj++; 1927 } 1928 } 1929 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1935 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1936 { 1937 int ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1946 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1947 { 1948 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1949 PetscFunctionBegin; 1950 *array = a->a; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1956 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1957 { 1958 PetscFunctionBegin; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #include "petscblaslapack.h" 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1965 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1966 { 1967 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1968 int ierr,one=1,i,bs=y->bs,j,bs2; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1973 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1974 if (y->xtoy && y->XtoY != X) { 1975 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1976 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1977 } 1978 if (!y->xtoy) { /* get xtoy */ 1979 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1980 y->XtoY = X; 1981 } 1982 bs2 = bs*bs; 1983 for (i=0; i<x->nz; i++) { 1984 j = 0; 1985 while (j < bs2){ 1986 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1987 j++; 1988 } 1989 } 1990 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1991 } else { 1992 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 /* -------------------------------------------------------------------*/ 1998 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1999 MatGetRow_SeqBAIJ, 2000 MatRestoreRow_SeqBAIJ, 2001 MatMult_SeqBAIJ_N, 2002 /* 4*/ MatMultAdd_SeqBAIJ_N, 2003 MatMultTranspose_SeqBAIJ, 2004 MatMultTransposeAdd_SeqBAIJ, 2005 MatSolve_SeqBAIJ_N, 2006 0, 2007 0, 2008 /*10*/ 0, 2009 MatLUFactor_SeqBAIJ, 2010 0, 2011 0, 2012 MatTranspose_SeqBAIJ, 2013 /*15*/ MatGetInfo_SeqBAIJ, 2014 MatEqual_SeqBAIJ, 2015 MatGetDiagonal_SeqBAIJ, 2016 MatDiagonalScale_SeqBAIJ, 2017 MatNorm_SeqBAIJ, 2018 /*20*/ 0, 2019 MatAssemblyEnd_SeqBAIJ, 2020 0, 2021 MatSetOption_SeqBAIJ, 2022 MatZeroEntries_SeqBAIJ, 2023 /*25*/ MatZeroRows_SeqBAIJ, 2024 MatLUFactorSymbolic_SeqBAIJ, 2025 MatLUFactorNumeric_SeqBAIJ_N, 2026 0, 2027 0, 2028 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2029 MatILUFactorSymbolic_SeqBAIJ, 2030 0, 2031 MatGetArray_SeqBAIJ, 2032 MatRestoreArray_SeqBAIJ, 2033 /*35*/ MatDuplicate_SeqBAIJ, 2034 0, 2035 0, 2036 MatILUFactor_SeqBAIJ, 2037 0, 2038 /*40*/ MatAXPY_SeqBAIJ, 2039 MatGetSubMatrices_SeqBAIJ, 2040 MatIncreaseOverlap_SeqBAIJ, 2041 MatGetValues_SeqBAIJ, 2042 0, 2043 /*45*/ MatPrintHelp_SeqBAIJ, 2044 MatScale_SeqBAIJ, 2045 0, 2046 0, 2047 0, 2048 /*50*/ MatGetBlockSize_SeqBAIJ, 2049 MatGetRowIJ_SeqBAIJ, 2050 MatRestoreRowIJ_SeqBAIJ, 2051 0, 2052 0, 2053 /*55*/ 0, 2054 0, 2055 0, 2056 0, 2057 MatSetValuesBlocked_SeqBAIJ, 2058 /*60*/ MatGetSubMatrix_SeqBAIJ, 2059 MatDestroy_SeqBAIJ, 2060 MatView_SeqBAIJ, 2061 MatGetPetscMaps_Petsc, 2062 0, 2063 /*65*/ 0, 2064 0, 2065 0, 2066 0, 2067 0, 2068 /*70*/ MatGetRowMax_SeqBAIJ, 2069 MatConvert_Basic, 2070 0, 2071 0, 2072 0, 2073 /*75*/ 0, 2074 0, 2075 0, 2076 0, 2077 0, 2078 /*80*/ 0, 2079 0, 2080 0, 2081 0, 2082 /*85*/ MatLoad_SeqBAIJ, 2083 0, 2084 0, 2085 0, 2086 0 2087 }; 2088 2089 EXTERN_C_BEGIN 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2092 int MatStoreValues_SeqBAIJ(Mat mat) 2093 { 2094 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2095 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2096 int ierr; 2097 2098 PetscFunctionBegin; 2099 if (aij->nonew != 1) { 2100 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2101 } 2102 2103 /* allocate space for values if not already there */ 2104 if (!aij->saved_values) { 2105 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2106 } 2107 2108 /* copy values over */ 2109 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2110 PetscFunctionReturn(0); 2111 } 2112 EXTERN_C_END 2113 2114 EXTERN_C_BEGIN 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2117 int MatRetrieveValues_SeqBAIJ(Mat mat) 2118 { 2119 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2120 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 2121 2122 PetscFunctionBegin; 2123 if (aij->nonew != 1) { 2124 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2125 } 2126 if (!aij->saved_values) { 2127 SETERRQ(1,"Must call MatStoreValues(A);first"); 2128 } 2129 2130 /* copy values over */ 2131 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 EXTERN_C_END 2135 2136 EXTERN_C_BEGIN 2137 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2138 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2139 EXTERN_C_END 2140 2141 EXTERN_C_BEGIN 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2144 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2145 { 2146 Mat_SeqBAIJ *b; 2147 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 2148 PetscTruth flg; 2149 2150 PetscFunctionBegin; 2151 2152 B->preallocated = PETSC_TRUE; 2153 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2154 if (nnz && newbs != bs) { 2155 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 2156 } 2157 bs = newbs; 2158 2159 mbs = B->m/bs; 2160 nbs = B->n/bs; 2161 bs2 = bs*bs; 2162 2163 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2164 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2165 } 2166 2167 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2168 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2169 if (nnz) { 2170 for (i=0; i<mbs; i++) { 2171 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2172 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2173 } 2174 } 2175 2176 b = (Mat_SeqBAIJ*)B->data; 2177 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2178 B->ops->solve = MatSolve_SeqBAIJ_Update; 2179 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2180 if (!flg) { 2181 switch (bs) { 2182 case 1: 2183 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2184 B->ops->mult = MatMult_SeqBAIJ_1; 2185 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2186 break; 2187 case 2: 2188 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2189 B->ops->mult = MatMult_SeqBAIJ_2; 2190 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2191 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2192 break; 2193 case 3: 2194 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2195 B->ops->mult = MatMult_SeqBAIJ_3; 2196 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2197 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2198 break; 2199 case 4: 2200 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2201 B->ops->mult = MatMult_SeqBAIJ_4; 2202 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2203 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2204 break; 2205 case 5: 2206 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2207 B->ops->mult = MatMult_SeqBAIJ_5; 2208 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2209 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2210 break; 2211 case 6: 2212 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2213 B->ops->mult = MatMult_SeqBAIJ_6; 2214 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2215 break; 2216 case 7: 2217 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2218 B->ops->mult = MatMult_SeqBAIJ_7; 2219 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2220 break; 2221 default: 2222 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2223 B->ops->mult = MatMult_SeqBAIJ_N; 2224 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2225 break; 2226 } 2227 } 2228 b->bs = bs; 2229 b->mbs = mbs; 2230 b->nbs = nbs; 2231 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2232 if (!nnz) { 2233 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2234 else if (nz <= 0) nz = 1; 2235 for (i=0; i<mbs; i++) b->imax[i] = nz; 2236 nz = nz*mbs; 2237 } else { 2238 nz = 0; 2239 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2240 } 2241 2242 /* allocate the matrix space */ 2243 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2244 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2245 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2246 b->j = (int*)(b->a + nz*bs2); 2247 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2248 b->i = b->j + nz; 2249 b->singlemalloc = PETSC_TRUE; 2250 2251 b->i[0] = 0; 2252 for (i=1; i<mbs+1; i++) { 2253 b->i[i] = b->i[i-1] + b->imax[i-1]; 2254 } 2255 2256 /* b->ilen will count nonzeros in each block row so far. */ 2257 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2258 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2259 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2260 2261 b->bs = bs; 2262 b->bs2 = bs2; 2263 b->mbs = mbs; 2264 b->nz = 0; 2265 b->maxnz = nz*bs2; 2266 B->info.nz_unneeded = (PetscReal)b->maxnz; 2267 PetscFunctionReturn(0); 2268 } 2269 EXTERN_C_END 2270 2271 /*MC 2272 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2273 block sparse compressed row format. 2274 2275 Options Database Keys: 2276 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2277 2278 Level: beginner 2279 2280 .seealso: MatCreateSeqBAIJ 2281 M*/ 2282 2283 EXTERN_C_BEGIN 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatCreate_SeqBAIJ" 2286 int MatCreate_SeqBAIJ(Mat B) 2287 { 2288 int ierr,size; 2289 Mat_SeqBAIJ *b; 2290 2291 PetscFunctionBegin; 2292 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2293 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2294 2295 B->m = B->M = PetscMax(B->m,B->M); 2296 B->n = B->N = PetscMax(B->n,B->N); 2297 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2298 B->data = (void*)b; 2299 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2300 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2301 B->factor = 0; 2302 B->lupivotthreshold = 1.0; 2303 B->mapping = 0; 2304 b->row = 0; 2305 b->col = 0; 2306 b->icol = 0; 2307 b->reallocs = 0; 2308 b->saved_values = 0; 2309 #if defined(PETSC_USE_MAT_SINGLE) 2310 b->setvalueslen = 0; 2311 b->setvaluescopy = PETSC_NULL; 2312 #endif 2313 2314 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2315 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2316 2317 b->sorted = PETSC_FALSE; 2318 b->roworiented = PETSC_TRUE; 2319 b->nonew = 0; 2320 b->diag = 0; 2321 b->solve_work = 0; 2322 b->mult_work = 0; 2323 B->spptr = 0; 2324 B->info.nz_unneeded = (PetscReal)b->maxnz; 2325 b->keepzeroedrows = PETSC_FALSE; 2326 b->xtoy = 0; 2327 b->XtoY = 0; 2328 2329 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2330 "MatStoreValues_SeqBAIJ", 2331 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2333 "MatRetrieveValues_SeqBAIJ", 2334 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2335 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2336 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2337 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2339 "MatConvert_SeqBAIJ_SeqAIJ", 2340 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2342 "MatConvert_SeqBAIJ_SeqSBAIJ", 2343 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2345 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2346 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2347 PetscFunctionReturn(0); 2348 } 2349 EXTERN_C_END 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2353 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2354 { 2355 Mat C; 2356 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2357 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 2358 2359 PetscFunctionBegin; 2360 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2361 2362 *B = 0; 2363 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2364 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2365 c = (Mat_SeqBAIJ*)C->data; 2366 2367 C->M = A->M; 2368 C->N = A->N; 2369 c->bs = a->bs; 2370 c->bs2 = a->bs2; 2371 c->mbs = a->mbs; 2372 c->nbs = a->nbs; 2373 2374 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2375 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2376 for (i=0; i<mbs; i++) { 2377 c->imax[i] = a->imax[i]; 2378 c->ilen[i] = a->ilen[i]; 2379 } 2380 2381 /* allocate the matrix space */ 2382 c->singlemalloc = PETSC_TRUE; 2383 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2384 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2385 c->j = (int*)(c->a + nz*bs2); 2386 c->i = c->j + nz; 2387 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2388 if (mbs > 0) { 2389 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2390 if (cpvalues == MAT_COPY_VALUES) { 2391 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2392 } else { 2393 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2394 } 2395 } 2396 2397 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2398 c->sorted = a->sorted; 2399 c->roworiented = a->roworiented; 2400 c->nonew = a->nonew; 2401 2402 if (a->diag) { 2403 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2404 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2405 for (i=0; i<mbs; i++) { 2406 c->diag[i] = a->diag[i]; 2407 } 2408 } else c->diag = 0; 2409 c->nz = a->nz; 2410 c->maxnz = a->maxnz; 2411 c->solve_work = 0; 2412 c->mult_work = 0; 2413 C->preallocated = PETSC_TRUE; 2414 C->assembled = PETSC_TRUE; 2415 *B = C; 2416 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatLoad_SeqBAIJ" 2422 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2423 { 2424 Mat_SeqBAIJ *a; 2425 Mat B; 2426 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2427 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2428 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2429 int *masked,nmask,tmp,bs2,ishift; 2430 PetscScalar *aa; 2431 MPI_Comm comm = ((PetscObject)viewer)->comm; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2435 bs2 = bs*bs; 2436 2437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2438 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2439 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2440 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2441 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2442 M = header[1]; N = header[2]; nz = header[3]; 2443 2444 if (header[3] < 0) { 2445 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2446 } 2447 2448 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2449 2450 /* 2451 This code adds extra rows to make sure the number of rows is 2452 divisible by the blocksize 2453 */ 2454 mbs = M/bs; 2455 extra_rows = bs - M + bs*(mbs); 2456 if (extra_rows == bs) extra_rows = 0; 2457 else mbs++; 2458 if (extra_rows) { 2459 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2460 } 2461 2462 /* read in row lengths */ 2463 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2464 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2465 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2466 2467 /* read in column indices */ 2468 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2469 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2470 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2471 2472 /* loop over row lengths determining block row lengths */ 2473 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2474 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2475 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2476 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2477 masked = mask + mbs; 2478 rowcount = 0; nzcount = 0; 2479 for (i=0; i<mbs; i++) { 2480 nmask = 0; 2481 for (j=0; j<bs; j++) { 2482 kmax = rowlengths[rowcount]; 2483 for (k=0; k<kmax; k++) { 2484 tmp = jj[nzcount++]/bs; 2485 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2486 } 2487 rowcount++; 2488 } 2489 browlengths[i] += nmask; 2490 /* zero out the mask elements we set */ 2491 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2492 } 2493 2494 /* create our matrix */ 2495 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2496 ierr = MatSetType(B,type);CHKERRQ(ierr); 2497 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2498 a = (Mat_SeqBAIJ*)B->data; 2499 2500 /* set matrix "i" values */ 2501 a->i[0] = 0; 2502 for (i=1; i<= mbs; i++) { 2503 a->i[i] = a->i[i-1] + browlengths[i-1]; 2504 a->ilen[i-1] = browlengths[i-1]; 2505 } 2506 a->nz = 0; 2507 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2508 2509 /* read in nonzero values */ 2510 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2511 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2512 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2513 2514 /* set "a" and "j" values into matrix */ 2515 nzcount = 0; jcount = 0; 2516 for (i=0; i<mbs; i++) { 2517 nzcountb = nzcount; 2518 nmask = 0; 2519 for (j=0; j<bs; j++) { 2520 kmax = rowlengths[i*bs+j]; 2521 for (k=0; k<kmax; k++) { 2522 tmp = jj[nzcount++]/bs; 2523 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2524 } 2525 } 2526 /* sort the masked values */ 2527 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2528 2529 /* set "j" values into matrix */ 2530 maskcount = 1; 2531 for (j=0; j<nmask; j++) { 2532 a->j[jcount++] = masked[j]; 2533 mask[masked[j]] = maskcount++; 2534 } 2535 /* set "a" values into matrix */ 2536 ishift = bs2*a->i[i]; 2537 for (j=0; j<bs; j++) { 2538 kmax = rowlengths[i*bs+j]; 2539 for (k=0; k<kmax; k++) { 2540 tmp = jj[nzcountb]/bs ; 2541 block = mask[tmp] - 1; 2542 point = jj[nzcountb] - bs*tmp; 2543 idx = ishift + bs2*block + j + bs*point; 2544 a->a[idx] = (MatScalar)aa[nzcountb++]; 2545 } 2546 } 2547 /* zero out the mask elements we set */ 2548 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2549 } 2550 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2551 2552 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2553 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2554 ierr = PetscFree(aa);CHKERRQ(ierr); 2555 ierr = PetscFree(jj);CHKERRQ(ierr); 2556 ierr = PetscFree(mask);CHKERRQ(ierr); 2557 2558 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatView_Private(B);CHKERRQ(ierr); 2561 2562 *A = B; 2563 PetscFunctionReturn(0); 2564 } 2565 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatCreateSeqBAIJ" 2568 /*@C 2569 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2570 compressed row) format. For good matrix assembly performance the 2571 user should preallocate the matrix storage by setting the parameter nz 2572 (or the array nnz). By setting these parameters accurately, performance 2573 during matrix assembly can be increased by more than a factor of 50. 2574 2575 Collective on MPI_Comm 2576 2577 Input Parameters: 2578 + comm - MPI communicator, set to PETSC_COMM_SELF 2579 . bs - size of block 2580 . m - number of rows 2581 . n - number of columns 2582 . nz - number of nonzero blocks per block row (same for all rows) 2583 - nnz - array containing the number of nonzero blocks in the various block rows 2584 (possibly different for each block row) or PETSC_NULL 2585 2586 Output Parameter: 2587 . A - the matrix 2588 2589 Options Database Keys: 2590 . -mat_no_unroll - uses code that does not unroll the loops in the 2591 block calculations (much slower) 2592 . -mat_block_size - size of the blocks to use 2593 2594 Level: intermediate 2595 2596 Notes: 2597 A nonzero block is any block that as 1 or more nonzeros in it 2598 2599 The block AIJ format is fully compatible with standard Fortran 77 2600 storage. That is, the stored row and column indices can begin at 2601 either one (as in Fortran) or zero. See the users' manual for details. 2602 2603 Specify the preallocated storage with either nz or nnz (not both). 2604 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2605 allocation. For additional details, see the users manual chapter on 2606 matrices. 2607 2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2609 @*/ 2610 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2611 { 2612 int ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2616 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2617 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2618 PetscFunctionReturn(0); 2619 } 2620 2621 #undef __FUNCT__ 2622 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2623 /*@C 2624 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2625 per row in the matrix. For good matrix assembly performance the 2626 user should preallocate the matrix storage by setting the parameter nz 2627 (or the array nnz). By setting these parameters accurately, performance 2628 during matrix assembly can be increased by more than a factor of 50. 2629 2630 Collective on MPI_Comm 2631 2632 Input Parameters: 2633 + A - the matrix 2634 . bs - size of block 2635 . nz - number of block nonzeros per block row (same for all rows) 2636 - nnz - array containing the number of block nonzeros in the various block rows 2637 (possibly different for each block row) or PETSC_NULL 2638 2639 Options Database Keys: 2640 . -mat_no_unroll - uses code that does not unroll the loops in the 2641 block calculations (much slower) 2642 . -mat_block_size - size of the blocks to use 2643 2644 Level: intermediate 2645 2646 Notes: 2647 The block AIJ format is fully compatible with standard Fortran 77 2648 storage. That is, the stored row and column indices can begin at 2649 either one (as in Fortran) or zero. See the users' manual for details. 2650 2651 Specify the preallocated storage with either nz or nnz (not both). 2652 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2653 allocation. For additional details, see the users manual chapter on 2654 matrices. 2655 2656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2657 @*/ 2658 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2659 { 2660 int ierr,(*f)(Mat,int,int,const int[]); 2661 2662 PetscFunctionBegin; 2663 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2664 if (f) { 2665 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670