1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 int MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr; 17 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 18 19 PetscFunctionBegin; 20 if (a->idiagvalid) PetscFunctionReturn(0); 21 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 22 diag_offset = a->diag; 23 if (!a->idiag) { 24 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 25 } 26 diag = a->idiag; 27 mdiag = a->idiag+bs*bs*mbs; 28 /* factor and invert each block */ 29 switch (a->bs){ 30 case 2: 31 for (i=0; i<mbs; i++) { 32 odiag = v + 4*diag_offset[i]; 33 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 34 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 35 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 36 diag += 4; 37 mdiag += 4; 38 } 39 break; 40 case 3: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 9*diag_offset[i]; 43 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 44 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 45 diag[8] = odiag[8]; 46 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 47 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 48 mdiag[8] = odiag[8]; 49 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 50 diag += 9; 51 mdiag += 9; 52 } 53 break; 54 case 4: 55 for (i=0; i<mbs; i++) { 56 odiag = v + 16*diag_offset[i]; 57 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 58 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 60 diag += 16; 61 mdiag += 16; 62 } 63 break; 64 case 5: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 25*diag_offset[i]; 67 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 68 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 70 diag += 25; 71 mdiag += 25; 72 } 73 break; 74 default: 75 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs); 76 } 77 a->idiagvalid = PETSC_TRUE; 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 83 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 84 { 85 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 86 PetscScalar *x,x1,x2,s1,s2; 87 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 88 int ierr,m = a->mbs,i,i2,nz,idx; 89 const int *diag,*ai = a->i,*aj = a->j,*vi; 90 91 PetscFunctionBegin; 92 its = its*lits; 93 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 94 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 95 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 96 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 97 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 98 99 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 100 101 diag = a->diag; 102 idiag = a->idiag; 103 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 104 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 105 106 if (flag & SOR_ZERO_INITIAL_GUESS) { 107 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 108 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 109 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 110 i2 = 2; 111 idiag += 4; 112 for (i=1; i<m; i++) { 113 v = aa + 4*ai[i]; 114 vi = aj + ai[i]; 115 nz = diag[i] - ai[i]; 116 s1 = b[i2]; s2 = b[i2+1]; 117 while (nz--) { 118 idx = 2*(*vi++); 119 x1 = x[idx]; x2 = x[1+idx]; 120 s1 -= v[0]*x1 + v[2]*x2; 121 s2 -= v[1]*x1 + v[3]*x2; 122 v += 4; 123 } 124 x[i2] = idiag[0]*s1 + idiag[2]*s2; 125 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 126 idiag += 4; 127 i2 += 2; 128 } 129 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 130 PetscLogFlops(4*(a->nz)); 131 } 132 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 133 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 134 i2 = 0; 135 mdiag = a->idiag+4*a->mbs; 136 for (i=0; i<m; i++) { 137 x1 = x[i2]; x2 = x[i2+1]; 138 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 139 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 140 mdiag += 4; 141 i2 += 2; 142 } 143 PetscLogFlops(6*m); 144 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 145 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 146 } 147 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 148 idiag = a->idiag+4*a->mbs - 4; 149 i2 = 2*m - 2; 150 x1 = x[i2]; x2 = x[i2+1]; 151 x[i2] = idiag[0]*x1 + idiag[2]*x2; 152 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 153 idiag -= 4; 154 i2 -= 2; 155 for (i=m-2; i>=0; i--) { 156 v = aa + 4*(diag[i]+1); 157 vi = aj + diag[i] + 1; 158 nz = ai[i+1] - diag[i] - 1; 159 s1 = x[i2]; s2 = x[i2+1]; 160 while (nz--) { 161 idx = 2*(*vi++); 162 x1 = x[idx]; x2 = x[1+idx]; 163 s1 -= v[0]*x1 + v[2]*x2; 164 s2 -= v[1]*x1 + v[3]*x2; 165 v += 4; 166 } 167 x[i2] = idiag[0]*s1 + idiag[2]*s2; 168 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 169 idiag -= 4; 170 i2 -= 2; 171 } 172 PetscLogFlops(4*(a->nz)); 173 } 174 } else { 175 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 176 } 177 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 178 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 184 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 185 { 186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 187 PetscScalar *x,x1,x2,x3,s1,s2,s3; 188 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 189 int ierr,m = a->mbs,i,i2,nz,idx; 190 const int *diag,*ai = a->i,*aj = a->j,*vi; 191 192 PetscFunctionBegin; 193 its = its*lits; 194 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 195 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 196 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 197 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 198 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 199 200 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 201 202 diag = a->diag; 203 idiag = a->idiag; 204 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 205 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 206 207 if (flag & SOR_ZERO_INITIAL_GUESS) { 208 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 209 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 210 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 211 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 212 i2 = 3; 213 idiag += 9; 214 for (i=1; i<m; i++) { 215 v = aa + 9*ai[i]; 216 vi = aj + ai[i]; 217 nz = diag[i] - ai[i]; 218 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 219 while (nz--) { 220 idx = 3*(*vi++); 221 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 222 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 223 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 224 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 225 v += 9; 226 } 227 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 228 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 229 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 230 idiag += 9; 231 i2 += 3; 232 } 233 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 234 PetscLogFlops(9*(a->nz)); 235 } 236 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 237 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 238 i2 = 0; 239 mdiag = a->idiag+9*a->mbs; 240 for (i=0; i<m; i++) { 241 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 242 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 243 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 244 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 245 mdiag += 9; 246 i2 += 3; 247 } 248 PetscLogFlops(15*m); 249 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 250 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 251 } 252 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 253 idiag = a->idiag+9*a->mbs - 9; 254 i2 = 3*m - 3; 255 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 256 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 257 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 258 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 259 idiag -= 9; 260 i2 -= 3; 261 for (i=m-2; i>=0; i--) { 262 v = aa + 9*(diag[i]+1); 263 vi = aj + diag[i] + 1; 264 nz = ai[i+1] - diag[i] - 1; 265 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 266 while (nz--) { 267 idx = 3*(*vi++); 268 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 269 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 270 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 271 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 272 v += 9; 273 } 274 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 275 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 276 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 277 idiag -= 9; 278 i2 -= 3; 279 } 280 PetscLogFlops(9*(a->nz)); 281 } 282 } else { 283 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 284 } 285 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 286 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 292 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 293 { 294 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 295 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 296 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 297 int ierr,m = a->mbs,i,i2,nz,idx; 298 const int *diag,*ai = a->i,*aj = a->j,*vi; 299 300 PetscFunctionBegin; 301 its = its*lits; 302 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 303 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 304 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 305 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 306 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 307 308 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 309 310 diag = a->diag; 311 idiag = a->idiag; 312 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 313 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 314 315 if (flag & SOR_ZERO_INITIAL_GUESS) { 316 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 317 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 318 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 319 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 320 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 321 i2 = 4; 322 idiag += 16; 323 for (i=1; i<m; i++) { 324 v = aa + 16*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 328 while (nz--) { 329 idx = 4*(*vi++); 330 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 331 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 332 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 333 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 334 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 335 v += 16; 336 } 337 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 338 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 339 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 340 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 341 idiag += 16; 342 i2 += 4; 343 } 344 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 345 PetscLogFlops(16*(a->nz)); 346 } 347 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 348 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 349 i2 = 0; 350 mdiag = a->idiag+16*a->mbs; 351 for (i=0; i<m; i++) { 352 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 353 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 354 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 355 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 356 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 357 mdiag += 16; 358 i2 += 4; 359 } 360 PetscLogFlops(28*m); 361 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 362 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 363 } 364 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 365 idiag = a->idiag+16*a->mbs - 16; 366 i2 = 4*m - 4; 367 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 368 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 369 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 370 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 371 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 372 idiag -= 16; 373 i2 -= 4; 374 for (i=m-2; i>=0; i--) { 375 v = aa + 16*(diag[i]+1); 376 vi = aj + diag[i] + 1; 377 nz = ai[i+1] - diag[i] - 1; 378 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 379 while (nz--) { 380 idx = 4*(*vi++); 381 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 382 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 383 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 384 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 385 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 386 v += 16; 387 } 388 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 389 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 390 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 391 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 392 idiag -= 16; 393 i2 -= 4; 394 } 395 PetscLogFlops(16*(a->nz)); 396 } 397 } else { 398 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 399 } 400 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 401 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 407 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 408 { 409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 410 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 411 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 412 int ierr,m = a->mbs,i,i2,nz,idx; 413 const int *diag,*ai = a->i,*aj = a->j,*vi; 414 415 PetscFunctionBegin; 416 its = its*lits; 417 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 418 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 419 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 420 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 421 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 422 423 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 424 425 diag = a->diag; 426 idiag = a->idiag; 427 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 428 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 429 430 if (flag & SOR_ZERO_INITIAL_GUESS) { 431 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 432 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 433 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 434 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 435 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 436 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 437 i2 = 5; 438 idiag += 25; 439 for (i=1; i<m; i++) { 440 v = aa + 25*ai[i]; 441 vi = aj + ai[i]; 442 nz = diag[i] - ai[i]; 443 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 444 while (nz--) { 445 idx = 5*(*vi++); 446 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 447 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 448 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 449 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 450 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 451 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 452 v += 25; 453 } 454 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 455 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 456 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 457 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 458 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 459 idiag += 25; 460 i2 += 5; 461 } 462 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 463 PetscLogFlops(25*(a->nz)); 464 } 465 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 466 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 467 i2 = 0; 468 mdiag = a->idiag+25*a->mbs; 469 for (i=0; i<m; i++) { 470 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 471 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 472 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 473 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 474 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 475 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 476 mdiag += 25; 477 i2 += 5; 478 } 479 PetscLogFlops(45*m); 480 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 481 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 482 } 483 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 484 idiag = a->idiag+25*a->mbs - 25; 485 i2 = 5*m - 5; 486 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 487 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 488 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 489 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 490 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 491 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 492 idiag -= 25; 493 i2 -= 5; 494 for (i=m-2; i>=0; i--) { 495 v = aa + 25*(diag[i]+1); 496 vi = aj + diag[i] + 1; 497 nz = ai[i+1] - diag[i] - 1; 498 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 499 while (nz--) { 500 idx = 5*(*vi++); 501 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 502 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 503 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 504 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 505 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 506 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 507 v += 25; 508 } 509 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 510 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 511 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 512 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 513 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 514 idiag -= 25; 515 i2 -= 5; 516 } 517 PetscLogFlops(25*(a->nz)); 518 } 519 } else { 520 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 521 } 522 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 523 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 /* 528 Special version for Fun3d sequential benchmark 529 */ 530 #if defined(PETSC_HAVE_FORTRAN_CAPS) 531 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 532 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 533 #define matsetvaluesblocked4_ matsetvaluesblocked4 534 #endif 535 536 EXTERN_C_BEGIN 537 #undef __FUNCT__ 538 #define __FUNCT__ "matsetvaluesblocked4_" 539 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 540 { 541 Mat A = *AA; 542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 543 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 544 int *ai=a->i,*ailen=a->ilen; 545 int *aj=a->j,stepval; 546 const PetscScalar *value = v; 547 MatScalar *ap,*aa = a->a,*bap; 548 549 PetscFunctionBegin; 550 stepval = (n-1)*4; 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 rp = aj + ai[row]; 554 ap = aa + 16*ai[row]; 555 nrow = ailen[row]; 556 low = 0; 557 for (l=0; l<n; l++) { /* loop over added columns */ 558 col = in[l]; 559 value = v + k*(stepval+4)*4 + l*4; 560 low = 0; high = nrow; 561 while (high-low > 7) { 562 t = (low+high)/2; 563 if (rp[t] > col) high = t; 564 else low = t; 565 } 566 for (i=low; i<high; i++) { 567 if (rp[i] > col) break; 568 if (rp[i] == col) { 569 bap = ap + 16*i; 570 for (ii=0; ii<4; ii++,value+=stepval) { 571 for (jj=ii; jj<16; jj+=4) { 572 bap[jj] += *value++; 573 } 574 } 575 goto noinsert2; 576 } 577 } 578 N = nrow++ - 1; 579 /* shift up all the later entries in this row */ 580 for (ii=N; ii>=i; ii--) { 581 rp[ii+1] = rp[ii]; 582 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 583 } 584 if (N >= i) { 585 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 586 } 587 rp[i] = col; 588 bap = ap + 16*i; 589 for (ii=0; ii<4; ii++,value+=stepval) { 590 for (jj=ii; jj<16; jj+=4) { 591 bap[jj] = *value++; 592 } 593 } 594 noinsert2:; 595 low = i; 596 } 597 ailen[row] = nrow; 598 } 599 } 600 EXTERN_C_END 601 602 #if defined(PETSC_HAVE_FORTRAN_CAPS) 603 #define matsetvalues4_ MATSETVALUES4 604 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 605 #define matsetvalues4_ matsetvalues4 606 #endif 607 608 EXTERN_C_BEGIN 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatSetValues4_" 611 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 612 { 613 Mat A = *AA; 614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 615 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 616 int *ai=a->i,*ailen=a->ilen; 617 int *aj=a->j,brow,bcol; 618 int ridx,cidx; 619 MatScalar *ap,value,*aa=a->a,*bap; 620 621 PetscFunctionBegin; 622 for (k=0; k<m; k++) { /* loop over added rows */ 623 row = im[k]; brow = row/4; 624 rp = aj + ai[brow]; 625 ap = aa + 16*ai[brow]; 626 nrow = ailen[brow]; 627 low = 0; 628 for (l=0; l<n; l++) { /* loop over added columns */ 629 col = in[l]; bcol = col/4; 630 ridx = row % 4; cidx = col % 4; 631 value = v[l + k*n]; 632 low = 0; high = nrow; 633 while (high-low > 7) { 634 t = (low+high)/2; 635 if (rp[t] > bcol) high = t; 636 else low = t; 637 } 638 for (i=low; i<high; i++) { 639 if (rp[i] > bcol) break; 640 if (rp[i] == bcol) { 641 bap = ap + 16*i + 4*cidx + ridx; 642 *bap += value; 643 goto noinsert1; 644 } 645 } 646 N = nrow++ - 1; 647 /* shift up all the later entries in this row */ 648 for (ii=N; ii>=i; ii--) { 649 rp[ii+1] = rp[ii]; 650 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 651 } 652 if (N>=i) { 653 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 654 } 655 rp[i] = bcol; 656 ap[16*i + 4*cidx + ridx] = value; 657 noinsert1:; 658 low = i; 659 } 660 ailen[brow] = nrow; 661 } 662 } 663 EXTERN_C_END 664 665 /* UGLY, ugly, ugly 666 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 667 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 668 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 669 converts the entries into single precision and then calls ..._MatScalar() to put them 670 into the single precision data structures. 671 */ 672 #if defined(PETSC_USE_MAT_SINGLE) 673 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 674 #else 675 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 676 #endif 677 678 #define CHUNKSIZE 10 679 680 /* 681 Checks for missing diagonals 682 */ 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 685 int MatMissingDiagonal_SeqBAIJ(Mat A) 686 { 687 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 688 int *diag,*jj = a->j,i,ierr; 689 690 PetscFunctionBegin; 691 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 692 diag = a->diag; 693 for (i=0; i<a->mbs; i++) { 694 if (jj[diag[i]] != i) { 695 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i); 696 } 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 703 int MatMarkDiagonal_SeqBAIJ(Mat A) 704 { 705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 706 int i,j,*diag,m = a->mbs,ierr; 707 708 PetscFunctionBegin; 709 if (a->diag) PetscFunctionReturn(0); 710 711 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 712 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 713 for (i=0; i<m; i++) { 714 diag[i] = a->i[i+1]; 715 for (j=a->i[i]; j<a->i[i+1]; j++) { 716 if (a->j[j] == i) { 717 diag[i] = j; 718 break; 719 } 720 } 721 } 722 a->diag = diag; 723 PetscFunctionReturn(0); 724 } 725 726 727 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 731 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 732 { 733 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 734 int ierr,n = a->mbs,i; 735 736 PetscFunctionBegin; 737 *nn = n; 738 if (!ia) PetscFunctionReturn(0); 739 if (symmetric) { 740 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 741 } else if (oshift == 1) { 742 /* temporarily add 1 to i and j indices */ 743 int nz = a->i[n]; 744 for (i=0; i<nz; i++) a->j[i]++; 745 for (i=0; i<n+1; i++) a->i[i]++; 746 *ia = a->i; *ja = a->j; 747 } else { 748 *ia = a->i; *ja = a->j; 749 } 750 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 756 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 757 { 758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 759 int i,n = a->mbs,ierr; 760 761 PetscFunctionBegin; 762 if (!ia) PetscFunctionReturn(0); 763 if (symmetric) { 764 ierr = PetscFree(*ia);CHKERRQ(ierr); 765 ierr = PetscFree(*ja);CHKERRQ(ierr); 766 } else if (oshift == 1) { 767 int nz = a->i[n]-1; 768 for (i=0; i<nz; i++) a->j[i]--; 769 for (i=0; i<n+1; i++) a->i[i]--; 770 } 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 776 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 777 { 778 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 779 780 PetscFunctionBegin; 781 *bs = baij->bs; 782 PetscFunctionReturn(0); 783 } 784 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatDestroy_SeqBAIJ" 788 int MatDestroy_SeqBAIJ(Mat A) 789 { 790 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 791 int ierr; 792 793 PetscFunctionBegin; 794 #if defined(PETSC_USE_LOG) 795 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 796 #endif 797 ierr = PetscFree(a->a);CHKERRQ(ierr); 798 if (!a->singlemalloc) { 799 ierr = PetscFree(a->i);CHKERRQ(ierr); 800 ierr = PetscFree(a->j);CHKERRQ(ierr); 801 } 802 if (a->row) { 803 ierr = ISDestroy(a->row);CHKERRQ(ierr); 804 } 805 if (a->col) { 806 ierr = ISDestroy(a->col);CHKERRQ(ierr); 807 } 808 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 809 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 810 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 811 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 812 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 813 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 814 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 815 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 816 #if defined(PETSC_USE_MAT_SINGLE) 817 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 818 #endif 819 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 820 821 ierr = PetscFree(a);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatSetOption_SeqBAIJ" 827 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 831 PetscFunctionBegin; 832 switch (op) { 833 case MAT_ROW_ORIENTED: 834 a->roworiented = PETSC_TRUE; 835 break; 836 case MAT_COLUMN_ORIENTED: 837 a->roworiented = PETSC_FALSE; 838 break; 839 case MAT_COLUMNS_SORTED: 840 a->sorted = PETSC_TRUE; 841 break; 842 case MAT_COLUMNS_UNSORTED: 843 a->sorted = PETSC_FALSE; 844 break; 845 case MAT_KEEP_ZEROED_ROWS: 846 a->keepzeroedrows = PETSC_TRUE; 847 break; 848 case MAT_NO_NEW_NONZERO_LOCATIONS: 849 a->nonew = 1; 850 break; 851 case MAT_NEW_NONZERO_LOCATION_ERR: 852 a->nonew = -1; 853 break; 854 case MAT_NEW_NONZERO_ALLOCATION_ERR: 855 a->nonew = -2; 856 break; 857 case MAT_YES_NEW_NONZERO_LOCATIONS: 858 a->nonew = 0; 859 break; 860 case MAT_ROWS_SORTED: 861 case MAT_ROWS_UNSORTED: 862 case MAT_YES_NEW_DIAGONALS: 863 case MAT_IGNORE_OFF_PROC_ENTRIES: 864 case MAT_USE_HASH_TABLE: 865 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 866 break; 867 case MAT_NO_NEW_DIAGONALS: 868 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 869 case MAT_SYMMETRIC: 870 case MAT_STRUCTURALLY_SYMMETRIC: 871 case MAT_NOT_SYMMETRIC: 872 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 873 case MAT_HERMITIAN: 874 case MAT_NOT_HERMITIAN: 875 case MAT_SYMMETRY_ETERNAL: 876 case MAT_NOT_SYMMETRY_ETERNAL: 877 break; 878 default: 879 SETERRQ(PETSC_ERR_SUP,"unknown option"); 880 } 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatGetRow_SeqBAIJ" 886 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 887 { 888 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 889 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 890 MatScalar *aa,*aa_i; 891 PetscScalar *v_i; 892 893 PetscFunctionBegin; 894 bs = a->bs; 895 ai = a->i; 896 aj = a->j; 897 aa = a->a; 898 bs2 = a->bs2; 899 900 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 901 902 bn = row/bs; /* Block number */ 903 bp = row % bs; /* Block Position */ 904 M = ai[bn+1] - ai[bn]; 905 *nz = bs*M; 906 907 if (v) { 908 *v = 0; 909 if (*nz) { 910 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 911 for (i=0; i<M; i++) { /* for each block in the block row */ 912 v_i = *v + i*bs; 913 aa_i = aa + bs2*(ai[bn] + i); 914 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 915 } 916 } 917 } 918 919 if (idx) { 920 *idx = 0; 921 if (*nz) { 922 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 923 for (i=0; i<M; i++) { /* for each block in the block row */ 924 idx_i = *idx + i*bs; 925 itmp = bs*aj[ai[bn] + i]; 926 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 927 } 928 } 929 } 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 935 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 936 { 937 int ierr; 938 939 PetscFunctionBegin; 940 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 941 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatTranspose_SeqBAIJ" 947 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 948 { 949 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 950 Mat C; 951 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 952 int *rows,*cols,bs2=a->bs2; 953 PetscScalar *array; 954 955 PetscFunctionBegin; 956 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 957 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 958 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 959 960 #if defined(PETSC_USE_MAT_SINGLE) 961 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 962 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 963 #else 964 array = a->a; 965 #endif 966 967 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 968 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 969 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 970 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 971 ierr = PetscFree(col);CHKERRQ(ierr); 972 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 973 cols = rows + bs; 974 for (i=0; i<mbs; i++) { 975 cols[0] = i*bs; 976 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 977 len = ai[i+1] - ai[i]; 978 for (j=0; j<len; j++) { 979 rows[0] = (*aj++)*bs; 980 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 981 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 982 array += bs2; 983 } 984 } 985 ierr = PetscFree(rows);CHKERRQ(ierr); 986 #if defined(PETSC_USE_MAT_SINGLE) 987 ierr = PetscFree(array); 988 #endif 989 990 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 993 if (B) { 994 *B = C; 995 } else { 996 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1003 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1004 { 1005 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1006 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1007 PetscScalar *aa; 1008 FILE *file; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1012 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1013 col_lens[0] = MAT_FILE_COOKIE; 1014 1015 col_lens[1] = A->m; 1016 col_lens[2] = A->n; 1017 col_lens[3] = a->nz*bs2; 1018 1019 /* store lengths of each row and write (including header) to file */ 1020 count = 0; 1021 for (i=0; i<a->mbs; i++) { 1022 for (j=0; j<bs; j++) { 1023 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1024 } 1025 } 1026 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1027 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1028 1029 /* store column indices (zero start index) */ 1030 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1031 count = 0; 1032 for (i=0; i<a->mbs; i++) { 1033 for (j=0; j<bs; j++) { 1034 for (k=a->i[i]; k<a->i[i+1]; k++) { 1035 for (l=0; l<bs; l++) { 1036 jj[count++] = bs*a->j[k] + l; 1037 } 1038 } 1039 } 1040 } 1041 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1042 ierr = PetscFree(jj);CHKERRQ(ierr); 1043 1044 /* store nonzero values */ 1045 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1046 count = 0; 1047 for (i=0; i<a->mbs; i++) { 1048 for (j=0; j<bs; j++) { 1049 for (k=a->i[i]; k<a->i[i+1]; k++) { 1050 for (l=0; l<bs; l++) { 1051 aa[count++] = a->a[bs2*k + l*bs + j]; 1052 } 1053 } 1054 } 1055 } 1056 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1057 ierr = PetscFree(aa);CHKERRQ(ierr); 1058 1059 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1060 if (file) { 1061 fprintf(file,"-matload_block_size %d\n",a->bs); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1068 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1069 { 1070 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1071 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 1072 PetscViewerFormat format; 1073 1074 PetscFunctionBegin; 1075 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1076 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1077 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1078 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1079 Mat aij; 1080 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1081 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1082 ierr = MatDestroy(aij);CHKERRQ(ierr); 1083 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1086 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1087 for (i=0; i<a->mbs; i++) { 1088 for (j=0; j<bs; j++) { 1089 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1090 for (k=a->i[i]; k<a->i[i+1]; k++) { 1091 for (l=0; l<bs; l++) { 1092 #if defined(PETSC_USE_COMPLEX) 1093 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1095 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1096 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1097 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1098 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1099 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1100 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1101 } 1102 #else 1103 if (a->a[bs2*k + l*bs + j] != 0.0) { 1104 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1105 } 1106 #endif 1107 } 1108 } 1109 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1110 } 1111 } 1112 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1113 } else { 1114 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1115 for (i=0; i<a->mbs; i++) { 1116 for (j=0; j<bs; j++) { 1117 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1118 for (k=a->i[i]; k<a->i[i+1]; k++) { 1119 for (l=0; l<bs; l++) { 1120 #if defined(PETSC_USE_COMPLEX) 1121 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1122 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1123 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1124 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1125 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1126 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1127 } else { 1128 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1129 } 1130 #else 1131 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1132 #endif 1133 } 1134 } 1135 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1136 } 1137 } 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1139 } 1140 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1146 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1147 { 1148 Mat A = (Mat) Aa; 1149 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1150 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1151 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1152 MatScalar *aa; 1153 PetscViewer viewer; 1154 1155 PetscFunctionBegin; 1156 1157 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1158 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1159 1160 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1161 1162 /* loop over matrix elements drawing boxes */ 1163 color = PETSC_DRAW_BLUE; 1164 for (i=0,row=0; i<mbs; i++,row+=bs) { 1165 for (j=a->i[i]; j<a->i[i+1]; j++) { 1166 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1167 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1168 aa = a->a + j*bs2; 1169 for (k=0; k<bs; k++) { 1170 for (l=0; l<bs; l++) { 1171 if (PetscRealPart(*aa++) >= 0.) continue; 1172 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1173 } 1174 } 1175 } 1176 } 1177 color = PETSC_DRAW_CYAN; 1178 for (i=0,row=0; i<mbs; i++,row+=bs) { 1179 for (j=a->i[i]; j<a->i[i+1]; j++) { 1180 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1181 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1182 aa = a->a + j*bs2; 1183 for (k=0; k<bs; k++) { 1184 for (l=0; l<bs; l++) { 1185 if (PetscRealPart(*aa++) != 0.) continue; 1186 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1187 } 1188 } 1189 } 1190 } 1191 1192 color = PETSC_DRAW_RED; 1193 for (i=0,row=0; i<mbs; i++,row+=bs) { 1194 for (j=a->i[i]; j<a->i[i+1]; j++) { 1195 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1196 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1197 aa = a->a + j*bs2; 1198 for (k=0; k<bs; k++) { 1199 for (l=0; l<bs; l++) { 1200 if (PetscRealPart(*aa++) <= 0.) continue; 1201 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1202 } 1203 } 1204 } 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1211 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1212 { 1213 int ierr; 1214 PetscReal xl,yl,xr,yr,w,h; 1215 PetscDraw draw; 1216 PetscTruth isnull; 1217 1218 PetscFunctionBegin; 1219 1220 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1221 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1222 1223 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1224 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1225 xr += w; yr += h; xl = -w; yl = -h; 1226 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1227 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1228 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatView_SeqBAIJ" 1234 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1235 { 1236 int ierr; 1237 PetscTruth iascii,isbinary,isdraw; 1238 1239 PetscFunctionBegin; 1240 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1241 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1242 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1243 if (iascii){ 1244 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1245 } else if (isbinary) { 1246 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1247 } else if (isdraw) { 1248 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1249 } else { 1250 Mat B; 1251 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1252 ierr = MatView(B,viewer);CHKERRQ(ierr); 1253 ierr = MatDestroy(B);CHKERRQ(ierr); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1261 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1262 { 1263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1264 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1265 int *ai = a->i,*ailen = a->ilen; 1266 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1267 MatScalar *ap,*aa = a->a,zero = 0.0; 1268 1269 PetscFunctionBegin; 1270 for (k=0; k<m; k++) { /* loop over rows */ 1271 row = im[k]; brow = row/bs; 1272 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1273 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1274 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1275 nrow = ailen[brow]; 1276 for (l=0; l<n; l++) { /* loop over columns */ 1277 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1278 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1279 col = in[l] ; 1280 bcol = col/bs; 1281 cidx = col%bs; 1282 ridx = row%bs; 1283 high = nrow; 1284 low = 0; /* assume unsorted */ 1285 while (high-low > 5) { 1286 t = (low+high)/2; 1287 if (rp[t] > bcol) high = t; 1288 else low = t; 1289 } 1290 for (i=low; i<high; i++) { 1291 if (rp[i] > bcol) break; 1292 if (rp[i] == bcol) { 1293 *v++ = ap[bs2*i+bs*cidx+ridx]; 1294 goto finished; 1295 } 1296 } 1297 *v++ = zero; 1298 finished:; 1299 } 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #if defined(PETSC_USE_MAT_SINGLE) 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1307 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1308 { 1309 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1310 int ierr,i,N = m*n*b->bs2; 1311 MatScalar *vsingle; 1312 1313 PetscFunctionBegin; 1314 if (N > b->setvalueslen) { 1315 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1316 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1317 b->setvalueslen = N; 1318 } 1319 vsingle = b->setvaluescopy; 1320 for (i=0; i<N; i++) { 1321 vsingle[i] = v[i]; 1322 } 1323 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 #endif 1327 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1331 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1332 { 1333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1334 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1335 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1336 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 1337 PetscTruth roworiented=a->roworiented; 1338 const MatScalar *value = v; 1339 MatScalar *ap,*aa = a->a,*bap; 1340 1341 PetscFunctionBegin; 1342 if (roworiented) { 1343 stepval = (n-1)*bs; 1344 } else { 1345 stepval = (m-1)*bs; 1346 } 1347 for (k=0; k<m; k++) { /* loop over added rows */ 1348 row = im[k]; 1349 if (row < 0) continue; 1350 #if defined(PETSC_USE_BOPT_g) 1351 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1352 #endif 1353 rp = aj + ai[row]; 1354 ap = aa + bs2*ai[row]; 1355 rmax = imax[row]; 1356 nrow = ailen[row]; 1357 low = 0; 1358 for (l=0; l<n; l++) { /* loop over added columns */ 1359 if (in[l] < 0) continue; 1360 #if defined(PETSC_USE_BOPT_g) 1361 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1362 #endif 1363 col = in[l]; 1364 if (roworiented) { 1365 value = v + k*(stepval+bs)*bs + l*bs; 1366 } else { 1367 value = v + l*(stepval+bs)*bs + k*bs; 1368 } 1369 if (!sorted) low = 0; high = nrow; 1370 while (high-low > 7) { 1371 t = (low+high)/2; 1372 if (rp[t] > col) high = t; 1373 else low = t; 1374 } 1375 for (i=low; i<high; i++) { 1376 if (rp[i] > col) break; 1377 if (rp[i] == col) { 1378 bap = ap + bs2*i; 1379 if (roworiented) { 1380 if (is == ADD_VALUES) { 1381 for (ii=0; ii<bs; ii++,value+=stepval) { 1382 for (jj=ii; jj<bs2; jj+=bs) { 1383 bap[jj] += *value++; 1384 } 1385 } 1386 } else { 1387 for (ii=0; ii<bs; ii++,value+=stepval) { 1388 for (jj=ii; jj<bs2; jj+=bs) { 1389 bap[jj] = *value++; 1390 } 1391 } 1392 } 1393 } else { 1394 if (is == ADD_VALUES) { 1395 for (ii=0; ii<bs; ii++,value+=stepval) { 1396 for (jj=0; jj<bs; jj++) { 1397 *bap++ += *value++; 1398 } 1399 } 1400 } else { 1401 for (ii=0; ii<bs; ii++,value+=stepval) { 1402 for (jj=0; jj<bs; jj++) { 1403 *bap++ = *value++; 1404 } 1405 } 1406 } 1407 } 1408 goto noinsert2; 1409 } 1410 } 1411 if (nonew == 1) goto noinsert2; 1412 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1413 if (nrow >= rmax) { 1414 /* there is no extra room in row, therefore enlarge */ 1415 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1416 MatScalar *new_a; 1417 1418 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1419 1420 /* malloc new storage space */ 1421 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1422 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1423 new_j = (int*)(new_a + bs2*new_nz); 1424 new_i = new_j + new_nz; 1425 1426 /* copy over old data into new slots */ 1427 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1428 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1429 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1430 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1431 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1432 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1433 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1434 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1435 /* free up old matrix storage */ 1436 ierr = PetscFree(a->a);CHKERRQ(ierr); 1437 if (!a->singlemalloc) { 1438 ierr = PetscFree(a->i);CHKERRQ(ierr); 1439 ierr = PetscFree(a->j);CHKERRQ(ierr); 1440 } 1441 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1442 a->singlemalloc = PETSC_TRUE; 1443 1444 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1445 rmax = imax[row] = imax[row] + CHUNKSIZE; 1446 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1447 a->maxnz += bs2*CHUNKSIZE; 1448 a->reallocs++; 1449 a->nz++; 1450 } 1451 N = nrow++ - 1; 1452 /* shift up all the later entries in this row */ 1453 for (ii=N; ii>=i; ii--) { 1454 rp[ii+1] = rp[ii]; 1455 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1456 } 1457 if (N >= i) { 1458 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1459 } 1460 rp[i] = col; 1461 bap = ap + bs2*i; 1462 if (roworiented) { 1463 for (ii=0; ii<bs; ii++,value+=stepval) { 1464 for (jj=ii; jj<bs2; jj+=bs) { 1465 bap[jj] = *value++; 1466 } 1467 } 1468 } else { 1469 for (ii=0; ii<bs; ii++,value+=stepval) { 1470 for (jj=0; jj<bs; jj++) { 1471 *bap++ = *value++; 1472 } 1473 } 1474 } 1475 noinsert2:; 1476 low = i; 1477 } 1478 ailen[row] = nrow; 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1485 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1489 int m = A->m,*ip,N,*ailen = a->ilen; 1490 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 1491 MatScalar *aa = a->a,*ap; 1492 1493 PetscFunctionBegin; 1494 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1495 1496 if (m) rmax = ailen[0]; 1497 for (i=1; i<mbs; i++) { 1498 /* move each row back by the amount of empty slots (fshift) before it*/ 1499 fshift += imax[i-1] - ailen[i-1]; 1500 rmax = PetscMax(rmax,ailen[i]); 1501 if (fshift) { 1502 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1503 N = ailen[i]; 1504 for (j=0; j<N; j++) { 1505 ip[j-fshift] = ip[j]; 1506 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 } 1508 } 1509 ai[i] = ai[i-1] + ailen[i-1]; 1510 } 1511 if (mbs) { 1512 fshift += imax[mbs-1] - ailen[mbs-1]; 1513 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1514 } 1515 /* reset ilen and imax for each row */ 1516 for (i=0; i<mbs; i++) { 1517 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1518 } 1519 a->nz = ai[mbs]; 1520 1521 /* diagonals may have moved, so kill the diagonal pointers */ 1522 a->idiagvalid = PETSC_FALSE; 1523 if (fshift && a->diag) { 1524 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1525 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1526 a->diag = 0; 1527 } 1528 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1529 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1530 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1531 a->reallocs = 0; 1532 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1533 1534 PetscFunctionReturn(0); 1535 } 1536 1537 1538 1539 /* 1540 This function returns an array of flags which indicate the locations of contiguous 1541 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1542 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1543 Assume: sizes should be long enough to hold all the values. 1544 */ 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1547 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1548 { 1549 int i,j,k,row; 1550 PetscTruth flg; 1551 1552 PetscFunctionBegin; 1553 for (i=0,j=0; i<n; j++) { 1554 row = idx[i]; 1555 if (row%bs!=0) { /* Not the begining of a block */ 1556 sizes[j] = 1; 1557 i++; 1558 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1559 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1560 i++; 1561 } else { /* Begining of the block, so check if the complete block exists */ 1562 flg = PETSC_TRUE; 1563 for (k=1; k<bs; k++) { 1564 if (row+k != idx[i+k]) { /* break in the block */ 1565 flg = PETSC_FALSE; 1566 break; 1567 } 1568 } 1569 if (flg == PETSC_TRUE) { /* No break in the bs */ 1570 sizes[j] = bs; 1571 i+= bs; 1572 } else { 1573 sizes[j] = 1; 1574 i++; 1575 } 1576 } 1577 } 1578 *bs_max = j; 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1584 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1585 { 1586 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1587 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1588 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1589 PetscScalar zero = 0.0; 1590 MatScalar *aa; 1591 1592 PetscFunctionBegin; 1593 /* Make a copy of the IS and sort it */ 1594 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1595 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1596 1597 /* allocate memory for rows,sizes */ 1598 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1599 sizes = rows + is_n; 1600 1601 /* copy IS values to rows, and sort them */ 1602 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1603 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1604 if (baij->keepzeroedrows) { 1605 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1606 bs_max = is_n; 1607 } else { 1608 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1609 } 1610 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1611 1612 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1613 row = rows[j]; 1614 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1615 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1616 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1617 if (sizes[i] == bs && !baij->keepzeroedrows) { 1618 if (diag) { 1619 if (baij->ilen[row/bs] > 0) { 1620 baij->ilen[row/bs] = 1; 1621 baij->j[baij->i[row/bs]] = row/bs; 1622 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1623 } 1624 /* Now insert all the diagonal values for this bs */ 1625 for (k=0; k<bs; k++) { 1626 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1627 } 1628 } else { /* (!diag) */ 1629 baij->ilen[row/bs] = 0; 1630 } /* end (!diag) */ 1631 } else { /* (sizes[i] != bs) */ 1632 #if defined (PETSC_USE_DEBUG) 1633 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1634 #endif 1635 for (k=0; k<count; k++) { 1636 aa[0] = zero; 1637 aa += bs; 1638 } 1639 if (diag) { 1640 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 1645 ierr = PetscFree(rows);CHKERRQ(ierr); 1646 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1652 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1653 { 1654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1655 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1656 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1657 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1658 int ridx,cidx,bs2=a->bs2,ierr; 1659 PetscTruth roworiented=a->roworiented; 1660 MatScalar *ap,value,*aa=a->a,*bap; 1661 1662 PetscFunctionBegin; 1663 for (k=0; k<m; k++) { /* loop over added rows */ 1664 row = im[k]; brow = row/bs; 1665 if (row < 0) continue; 1666 #if defined(PETSC_USE_BOPT_g) 1667 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1668 #endif 1669 rp = aj + ai[brow]; 1670 ap = aa + bs2*ai[brow]; 1671 rmax = imax[brow]; 1672 nrow = ailen[brow]; 1673 low = 0; 1674 for (l=0; l<n; l++) { /* loop over added columns */ 1675 if (in[l] < 0) continue; 1676 #if defined(PETSC_USE_BOPT_g) 1677 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1678 #endif 1679 col = in[l]; bcol = col/bs; 1680 ridx = row % bs; cidx = col % bs; 1681 if (roworiented) { 1682 value = v[l + k*n]; 1683 } else { 1684 value = v[k + l*m]; 1685 } 1686 if (!sorted) low = 0; high = nrow; 1687 while (high-low > 7) { 1688 t = (low+high)/2; 1689 if (rp[t] > bcol) high = t; 1690 else low = t; 1691 } 1692 for (i=low; i<high; i++) { 1693 if (rp[i] > bcol) break; 1694 if (rp[i] == bcol) { 1695 bap = ap + bs2*i + bs*cidx + ridx; 1696 if (is == ADD_VALUES) *bap += value; 1697 else *bap = value; 1698 goto noinsert1; 1699 } 1700 } 1701 if (nonew == 1) goto noinsert1; 1702 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1703 if (nrow >= rmax) { 1704 /* there is no extra room in row, therefore enlarge */ 1705 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1706 MatScalar *new_a; 1707 1708 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1709 1710 /* Malloc new storage space */ 1711 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1712 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1713 new_j = (int*)(new_a + bs2*new_nz); 1714 new_i = new_j + new_nz; 1715 1716 /* copy over old data into new slots */ 1717 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1718 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1719 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1720 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1721 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1722 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1723 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1724 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1725 /* free up old matrix storage */ 1726 ierr = PetscFree(a->a);CHKERRQ(ierr); 1727 if (!a->singlemalloc) { 1728 ierr = PetscFree(a->i);CHKERRQ(ierr); 1729 ierr = PetscFree(a->j);CHKERRQ(ierr); 1730 } 1731 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1732 a->singlemalloc = PETSC_TRUE; 1733 1734 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1735 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1736 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1737 a->maxnz += bs2*CHUNKSIZE; 1738 a->reallocs++; 1739 a->nz++; 1740 } 1741 N = nrow++ - 1; 1742 /* shift up all the later entries in this row */ 1743 for (ii=N; ii>=i; ii--) { 1744 rp[ii+1] = rp[ii]; 1745 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1746 } 1747 if (N>=i) { 1748 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1749 } 1750 rp[i] = bcol; 1751 ap[bs2*i + bs*cidx + ridx] = value; 1752 noinsert1:; 1753 low = i; 1754 } 1755 ailen[brow] = nrow; 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1763 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1764 { 1765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1766 Mat outA; 1767 int ierr; 1768 PetscTruth row_identity,col_identity; 1769 1770 PetscFunctionBegin; 1771 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1772 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1773 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1774 if (!row_identity || !col_identity) { 1775 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1776 } 1777 1778 outA = inA; 1779 inA->factor = FACTOR_LU; 1780 1781 if (!a->diag) { 1782 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1783 } 1784 1785 a->row = row; 1786 a->col = col; 1787 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1788 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1789 1790 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1791 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1792 PetscLogObjectParent(inA,a->icol); 1793 1794 /* 1795 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1796 for ILU(0) factorization with natural ordering 1797 */ 1798 if (a->bs < 8) { 1799 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1800 } else { 1801 if (!a->solve_work) { 1802 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1803 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1804 } 1805 } 1806 1807 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1813 int MatPrintHelp_SeqBAIJ(Mat A) 1814 { 1815 static PetscTruth called = PETSC_FALSE; 1816 MPI_Comm comm = A->comm; 1817 int ierr; 1818 1819 PetscFunctionBegin; 1820 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1821 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1822 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 EXTERN_C_BEGIN 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1829 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1830 { 1831 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1832 int i,nz,nbs; 1833 1834 PetscFunctionBegin; 1835 nz = baij->maxnz/baij->bs2; 1836 nbs = baij->nbs; 1837 for (i=0; i<nz; i++) { 1838 baij->j[i] = indices[i]; 1839 } 1840 baij->nz = nz; 1841 for (i=0; i<nbs; i++) { 1842 baij->ilen[i] = baij->imax[i]; 1843 } 1844 1845 PetscFunctionReturn(0); 1846 } 1847 EXTERN_C_END 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1851 /*@ 1852 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1853 in the matrix. 1854 1855 Input Parameters: 1856 + mat - the SeqBAIJ matrix 1857 - indices - the column indices 1858 1859 Level: advanced 1860 1861 Notes: 1862 This can be called if you have precomputed the nonzero structure of the 1863 matrix and want to provide it to the matrix object to improve the performance 1864 of the MatSetValues() operation. 1865 1866 You MUST have set the correct numbers of nonzeros per row in the call to 1867 MatCreateSeqBAIJ(). 1868 1869 MUST be called before any calls to MatSetValues(); 1870 1871 @*/ 1872 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1873 { 1874 int ierr,(*f)(Mat,int *); 1875 1876 PetscFunctionBegin; 1877 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1878 PetscValidPointer(indices,2); 1879 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1880 if (f) { 1881 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1882 } else { 1883 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1884 } 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1890 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1891 { 1892 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1893 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1894 PetscReal atmp; 1895 PetscScalar *x,zero = 0.0; 1896 MatScalar *aa; 1897 int ncols,brow,krow,kcol; 1898 1899 PetscFunctionBegin; 1900 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1901 bs = a->bs; 1902 aa = a->a; 1903 ai = a->i; 1904 aj = a->j; 1905 mbs = a->mbs; 1906 1907 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1908 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1909 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1910 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1911 for (i=0; i<mbs; i++) { 1912 ncols = ai[1] - ai[0]; ai++; 1913 brow = bs*i; 1914 for (j=0; j<ncols; j++){ 1915 /* bcol = bs*(*aj); */ 1916 for (kcol=0; kcol<bs; kcol++){ 1917 for (krow=0; krow<bs; krow++){ 1918 atmp = PetscAbsScalar(*aa); aa++; 1919 row = brow + krow; /* row index */ 1920 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1921 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1922 } 1923 } 1924 aj++; 1925 } 1926 } 1927 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1933 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1934 { 1935 int ierr; 1936 1937 PetscFunctionBegin; 1938 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1944 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1945 { 1946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1947 PetscFunctionBegin; 1948 *array = a->a; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1954 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1955 { 1956 PetscFunctionBegin; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscblaslapack.h" 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1963 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1964 { 1965 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1966 int ierr,one=1,i,bs=y->bs,j,bs2; 1967 1968 PetscFunctionBegin; 1969 if (str == SAME_NONZERO_PATTERN) { 1970 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1971 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1972 if (y->xtoy && y->XtoY != X) { 1973 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1974 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1975 } 1976 if (!y->xtoy) { /* get xtoy */ 1977 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1978 y->XtoY = X; 1979 } 1980 bs2 = bs*bs; 1981 for (i=0; i<x->nz; i++) { 1982 j = 0; 1983 while (j < bs2){ 1984 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1985 j++; 1986 } 1987 } 1988 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1989 } else { 1990 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1991 } 1992 PetscFunctionReturn(0); 1993 } 1994 1995 /* -------------------------------------------------------------------*/ 1996 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1997 MatGetRow_SeqBAIJ, 1998 MatRestoreRow_SeqBAIJ, 1999 MatMult_SeqBAIJ_N, 2000 /* 4*/ MatMultAdd_SeqBAIJ_N, 2001 MatMultTranspose_SeqBAIJ, 2002 MatMultTransposeAdd_SeqBAIJ, 2003 MatSolve_SeqBAIJ_N, 2004 0, 2005 0, 2006 /*10*/ 0, 2007 MatLUFactor_SeqBAIJ, 2008 0, 2009 0, 2010 MatTranspose_SeqBAIJ, 2011 /*15*/ MatGetInfo_SeqBAIJ, 2012 MatEqual_SeqBAIJ, 2013 MatGetDiagonal_SeqBAIJ, 2014 MatDiagonalScale_SeqBAIJ, 2015 MatNorm_SeqBAIJ, 2016 /*20*/ 0, 2017 MatAssemblyEnd_SeqBAIJ, 2018 0, 2019 MatSetOption_SeqBAIJ, 2020 MatZeroEntries_SeqBAIJ, 2021 /*25*/ MatZeroRows_SeqBAIJ, 2022 MatLUFactorSymbolic_SeqBAIJ, 2023 MatLUFactorNumeric_SeqBAIJ_N, 2024 0, 2025 0, 2026 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2027 MatILUFactorSymbolic_SeqBAIJ, 2028 0, 2029 MatGetArray_SeqBAIJ, 2030 MatRestoreArray_SeqBAIJ, 2031 /*35*/ MatDuplicate_SeqBAIJ, 2032 0, 2033 0, 2034 MatILUFactor_SeqBAIJ, 2035 0, 2036 /*40*/ MatAXPY_SeqBAIJ, 2037 MatGetSubMatrices_SeqBAIJ, 2038 MatIncreaseOverlap_SeqBAIJ, 2039 MatGetValues_SeqBAIJ, 2040 0, 2041 /*45*/ MatPrintHelp_SeqBAIJ, 2042 MatScale_SeqBAIJ, 2043 0, 2044 0, 2045 0, 2046 /*50*/ MatGetBlockSize_SeqBAIJ, 2047 MatGetRowIJ_SeqBAIJ, 2048 MatRestoreRowIJ_SeqBAIJ, 2049 0, 2050 0, 2051 /*55*/ 0, 2052 0, 2053 0, 2054 0, 2055 MatSetValuesBlocked_SeqBAIJ, 2056 /*60*/ MatGetSubMatrix_SeqBAIJ, 2057 MatDestroy_SeqBAIJ, 2058 MatView_SeqBAIJ, 2059 MatGetPetscMaps_Petsc, 2060 0, 2061 /*65*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /*70*/ MatGetRowMax_SeqBAIJ, 2067 MatConvert_Basic, 2068 0, 2069 0, 2070 0, 2071 /*75*/ 0, 2072 0, 2073 0, 2074 0, 2075 0, 2076 /*80*/ 0, 2077 0, 2078 0, 2079 0, 2080 /*85*/ MatLoad_SeqBAIJ, 2081 0, 2082 0, 2083 0, 2084 0 2085 }; 2086 2087 EXTERN_C_BEGIN 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2090 int MatStoreValues_SeqBAIJ(Mat mat) 2091 { 2092 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2093 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2094 int ierr; 2095 2096 PetscFunctionBegin; 2097 if (aij->nonew != 1) { 2098 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2099 } 2100 2101 /* allocate space for values if not already there */ 2102 if (!aij->saved_values) { 2103 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2104 } 2105 2106 /* copy values over */ 2107 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2108 PetscFunctionReturn(0); 2109 } 2110 EXTERN_C_END 2111 2112 EXTERN_C_BEGIN 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2115 int MatRetrieveValues_SeqBAIJ(Mat mat) 2116 { 2117 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2118 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 2119 2120 PetscFunctionBegin; 2121 if (aij->nonew != 1) { 2122 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2123 } 2124 if (!aij->saved_values) { 2125 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2126 } 2127 2128 /* copy values over */ 2129 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 EXTERN_C_END 2133 2134 EXTERN_C_BEGIN 2135 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2136 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2137 EXTERN_C_END 2138 2139 EXTERN_C_BEGIN 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2142 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2143 { 2144 Mat_SeqBAIJ *b; 2145 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 2146 PetscTruth flg; 2147 2148 PetscFunctionBegin; 2149 2150 B->preallocated = PETSC_TRUE; 2151 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2152 if (nnz && newbs != bs) { 2153 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2154 } 2155 bs = newbs; 2156 2157 mbs = B->m/bs; 2158 nbs = B->n/bs; 2159 bs2 = bs*bs; 2160 2161 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2162 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2163 } 2164 2165 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2166 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2167 if (nnz) { 2168 for (i=0; i<mbs; i++) { 2169 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2170 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2171 } 2172 } 2173 2174 b = (Mat_SeqBAIJ*)B->data; 2175 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2176 B->ops->solve = MatSolve_SeqBAIJ_Update; 2177 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2178 if (!flg) { 2179 switch (bs) { 2180 case 1: 2181 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2182 B->ops->mult = MatMult_SeqBAIJ_1; 2183 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2184 break; 2185 case 2: 2186 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2187 B->ops->mult = MatMult_SeqBAIJ_2; 2188 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2189 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2190 break; 2191 case 3: 2192 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2193 B->ops->mult = MatMult_SeqBAIJ_3; 2194 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2195 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2196 break; 2197 case 4: 2198 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2199 B->ops->mult = MatMult_SeqBAIJ_4; 2200 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2201 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2202 break; 2203 case 5: 2204 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2205 B->ops->mult = MatMult_SeqBAIJ_5; 2206 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2207 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2208 break; 2209 case 6: 2210 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2211 B->ops->mult = MatMult_SeqBAIJ_6; 2212 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2213 break; 2214 case 7: 2215 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2216 B->ops->mult = MatMult_SeqBAIJ_7; 2217 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2218 break; 2219 default: 2220 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2221 B->ops->mult = MatMult_SeqBAIJ_N; 2222 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2223 break; 2224 } 2225 } 2226 b->bs = bs; 2227 b->mbs = mbs; 2228 b->nbs = nbs; 2229 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2230 if (!nnz) { 2231 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2232 else if (nz <= 0) nz = 1; 2233 for (i=0; i<mbs; i++) b->imax[i] = nz; 2234 nz = nz*mbs; 2235 } else { 2236 nz = 0; 2237 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2238 } 2239 2240 /* allocate the matrix space */ 2241 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2242 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2243 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2244 b->j = (int*)(b->a + nz*bs2); 2245 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2246 b->i = b->j + nz; 2247 b->singlemalloc = PETSC_TRUE; 2248 2249 b->i[0] = 0; 2250 for (i=1; i<mbs+1; i++) { 2251 b->i[i] = b->i[i-1] + b->imax[i-1]; 2252 } 2253 2254 /* b->ilen will count nonzeros in each block row so far. */ 2255 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2256 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2257 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2258 2259 b->bs = bs; 2260 b->bs2 = bs2; 2261 b->mbs = mbs; 2262 b->nz = 0; 2263 b->maxnz = nz*bs2; 2264 B->info.nz_unneeded = (PetscReal)b->maxnz; 2265 PetscFunctionReturn(0); 2266 } 2267 EXTERN_C_END 2268 2269 /*MC 2270 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2271 block sparse compressed row format. 2272 2273 Options Database Keys: 2274 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2275 2276 Level: beginner 2277 2278 .seealso: MatCreateSeqBAIJ 2279 M*/ 2280 2281 EXTERN_C_BEGIN 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatCreate_SeqBAIJ" 2284 int MatCreate_SeqBAIJ(Mat B) 2285 { 2286 int ierr,size; 2287 Mat_SeqBAIJ *b; 2288 2289 PetscFunctionBegin; 2290 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2291 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2292 2293 B->m = B->M = PetscMax(B->m,B->M); 2294 B->n = B->N = PetscMax(B->n,B->N); 2295 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2296 B->data = (void*)b; 2297 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2298 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2299 B->factor = 0; 2300 B->lupivotthreshold = 1.0; 2301 B->mapping = 0; 2302 b->row = 0; 2303 b->col = 0; 2304 b->icol = 0; 2305 b->reallocs = 0; 2306 b->saved_values = 0; 2307 #if defined(PETSC_USE_MAT_SINGLE) 2308 b->setvalueslen = 0; 2309 b->setvaluescopy = PETSC_NULL; 2310 #endif 2311 2312 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2313 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2314 2315 b->sorted = PETSC_FALSE; 2316 b->roworiented = PETSC_TRUE; 2317 b->nonew = 0; 2318 b->diag = 0; 2319 b->solve_work = 0; 2320 b->mult_work = 0; 2321 B->spptr = 0; 2322 B->info.nz_unneeded = (PetscReal)b->maxnz; 2323 b->keepzeroedrows = PETSC_FALSE; 2324 b->xtoy = 0; 2325 b->XtoY = 0; 2326 2327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2328 "MatStoreValues_SeqBAIJ", 2329 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2330 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2331 "MatRetrieveValues_SeqBAIJ", 2332 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2334 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2335 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2337 "MatConvert_SeqBAIJ_SeqAIJ", 2338 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2340 "MatConvert_SeqBAIJ_SeqSBAIJ", 2341 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2343 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2344 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 EXTERN_C_END 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2351 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2352 { 2353 Mat C; 2354 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2355 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 2356 2357 PetscFunctionBegin; 2358 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2359 2360 *B = 0; 2361 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2362 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2363 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2364 c = (Mat_SeqBAIJ*)C->data; 2365 2366 C->M = A->M; 2367 C->N = A->N; 2368 c->bs = a->bs; 2369 c->bs2 = a->bs2; 2370 c->mbs = a->mbs; 2371 c->nbs = a->nbs; 2372 2373 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2374 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2375 for (i=0; i<mbs; i++) { 2376 c->imax[i] = a->imax[i]; 2377 c->ilen[i] = a->ilen[i]; 2378 } 2379 2380 /* allocate the matrix space */ 2381 c->singlemalloc = PETSC_TRUE; 2382 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2383 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2384 c->j = (int*)(c->a + nz*bs2); 2385 c->i = c->j + nz; 2386 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2387 if (mbs > 0) { 2388 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2389 if (cpvalues == MAT_COPY_VALUES) { 2390 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2391 } else { 2392 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2393 } 2394 } 2395 2396 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2397 c->sorted = a->sorted; 2398 c->roworiented = a->roworiented; 2399 c->nonew = a->nonew; 2400 2401 if (a->diag) { 2402 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2403 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2404 for (i=0; i<mbs; i++) { 2405 c->diag[i] = a->diag[i]; 2406 } 2407 } else c->diag = 0; 2408 c->nz = a->nz; 2409 c->maxnz = a->maxnz; 2410 c->solve_work = 0; 2411 c->mult_work = 0; 2412 C->preallocated = PETSC_TRUE; 2413 C->assembled = PETSC_TRUE; 2414 *B = C; 2415 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatLoad_SeqBAIJ" 2421 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2422 { 2423 Mat_SeqBAIJ *a; 2424 Mat B; 2425 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2426 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2427 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2428 int *masked,nmask,tmp,bs2,ishift; 2429 PetscScalar *aa; 2430 MPI_Comm comm = ((PetscObject)viewer)->comm; 2431 2432 PetscFunctionBegin; 2433 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2434 bs2 = bs*bs; 2435 2436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2437 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2438 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2439 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2440 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2441 M = header[1]; N = header[2]; nz = header[3]; 2442 2443 if (header[3] < 0) { 2444 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2445 } 2446 2447 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2448 2449 /* 2450 This code adds extra rows to make sure the number of rows is 2451 divisible by the blocksize 2452 */ 2453 mbs = M/bs; 2454 extra_rows = bs - M + bs*(mbs); 2455 if (extra_rows == bs) extra_rows = 0; 2456 else mbs++; 2457 if (extra_rows) { 2458 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2459 } 2460 2461 /* read in row lengths */ 2462 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2463 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2464 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2465 2466 /* read in column indices */ 2467 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2468 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2469 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2470 2471 /* loop over row lengths determining block row lengths */ 2472 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2473 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2474 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2475 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2476 masked = mask + mbs; 2477 rowcount = 0; nzcount = 0; 2478 for (i=0; i<mbs; i++) { 2479 nmask = 0; 2480 for (j=0; j<bs; j++) { 2481 kmax = rowlengths[rowcount]; 2482 for (k=0; k<kmax; k++) { 2483 tmp = jj[nzcount++]/bs; 2484 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2485 } 2486 rowcount++; 2487 } 2488 browlengths[i] += nmask; 2489 /* zero out the mask elements we set */ 2490 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2491 } 2492 2493 /* create our matrix */ 2494 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2495 ierr = MatSetType(B,type);CHKERRQ(ierr); 2496 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2497 a = (Mat_SeqBAIJ*)B->data; 2498 2499 /* set matrix "i" values */ 2500 a->i[0] = 0; 2501 for (i=1; i<= mbs; i++) { 2502 a->i[i] = a->i[i-1] + browlengths[i-1]; 2503 a->ilen[i-1] = browlengths[i-1]; 2504 } 2505 a->nz = 0; 2506 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2507 2508 /* read in nonzero values */ 2509 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2510 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2511 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2512 2513 /* set "a" and "j" values into matrix */ 2514 nzcount = 0; jcount = 0; 2515 for (i=0; i<mbs; i++) { 2516 nzcountb = nzcount; 2517 nmask = 0; 2518 for (j=0; j<bs; j++) { 2519 kmax = rowlengths[i*bs+j]; 2520 for (k=0; k<kmax; k++) { 2521 tmp = jj[nzcount++]/bs; 2522 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2523 } 2524 } 2525 /* sort the masked values */ 2526 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2527 2528 /* set "j" values into matrix */ 2529 maskcount = 1; 2530 for (j=0; j<nmask; j++) { 2531 a->j[jcount++] = masked[j]; 2532 mask[masked[j]] = maskcount++; 2533 } 2534 /* set "a" values into matrix */ 2535 ishift = bs2*a->i[i]; 2536 for (j=0; j<bs; j++) { 2537 kmax = rowlengths[i*bs+j]; 2538 for (k=0; k<kmax; k++) { 2539 tmp = jj[nzcountb]/bs ; 2540 block = mask[tmp] - 1; 2541 point = jj[nzcountb] - bs*tmp; 2542 idx = ishift + bs2*block + j + bs*point; 2543 a->a[idx] = (MatScalar)aa[nzcountb++]; 2544 } 2545 } 2546 /* zero out the mask elements we set */ 2547 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2548 } 2549 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2550 2551 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2552 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2553 ierr = PetscFree(aa);CHKERRQ(ierr); 2554 ierr = PetscFree(jj);CHKERRQ(ierr); 2555 ierr = PetscFree(mask);CHKERRQ(ierr); 2556 2557 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2558 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatView_Private(B);CHKERRQ(ierr); 2560 2561 *A = B; 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatCreateSeqBAIJ" 2567 /*@C 2568 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2569 compressed row) format. For good matrix assembly performance the 2570 user should preallocate the matrix storage by setting the parameter nz 2571 (or the array nnz). By setting these parameters accurately, performance 2572 during matrix assembly can be increased by more than a factor of 50. 2573 2574 Collective on MPI_Comm 2575 2576 Input Parameters: 2577 + comm - MPI communicator, set to PETSC_COMM_SELF 2578 . bs - size of block 2579 . m - number of rows 2580 . n - number of columns 2581 . nz - number of nonzero blocks per block row (same for all rows) 2582 - nnz - array containing the number of nonzero blocks in the various block rows 2583 (possibly different for each block row) or PETSC_NULL 2584 2585 Output Parameter: 2586 . A - the matrix 2587 2588 Options Database Keys: 2589 . -mat_no_unroll - uses code that does not unroll the loops in the 2590 block calculations (much slower) 2591 . -mat_block_size - size of the blocks to use 2592 2593 Level: intermediate 2594 2595 Notes: 2596 A nonzero block is any block that as 1 or more nonzeros in it 2597 2598 The block AIJ format is fully compatible with standard Fortran 77 2599 storage. That is, the stored row and column indices can begin at 2600 either one (as in Fortran) or zero. See the users' manual for details. 2601 2602 Specify the preallocated storage with either nz or nnz (not both). 2603 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2604 allocation. For additional details, see the users manual chapter on 2605 matrices. 2606 2607 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2608 @*/ 2609 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2610 { 2611 int ierr; 2612 2613 PetscFunctionBegin; 2614 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2615 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2616 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2622 /*@C 2623 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2624 per row in the matrix. For good matrix assembly performance the 2625 user should preallocate the matrix storage by setting the parameter nz 2626 (or the array nnz). By setting these parameters accurately, performance 2627 during matrix assembly can be increased by more than a factor of 50. 2628 2629 Collective on MPI_Comm 2630 2631 Input Parameters: 2632 + A - the matrix 2633 . bs - size of block 2634 . nz - number of block nonzeros per block row (same for all rows) 2635 - nnz - array containing the number of block nonzeros in the various block rows 2636 (possibly different for each block row) or PETSC_NULL 2637 2638 Options Database Keys: 2639 . -mat_no_unroll - uses code that does not unroll the loops in the 2640 block calculations (much slower) 2641 . -mat_block_size - size of the blocks to use 2642 2643 Level: intermediate 2644 2645 Notes: 2646 The block AIJ format is fully compatible with standard Fortran 77 2647 storage. That is, the stored row and column indices can begin at 2648 either one (as in Fortran) or zero. See the users' manual for details. 2649 2650 Specify the preallocated storage with either nz or nnz (not both). 2651 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2652 allocation. For additional details, see the users manual chapter on 2653 matrices. 2654 2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2656 @*/ 2657 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2658 { 2659 int ierr,(*f)(Mat,int,int,const int[]); 2660 2661 PetscFunctionBegin; 2662 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2663 if (f) { 2664 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2665 } 2666 PetscFunctionReturn(0); 2667 } 2668 2669