1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr; 17 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 18 19 PetscFunctionBegin; 20 if (a->idiagvalid) PetscFunctionReturn(0); 21 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 22 diag_offset = a->diag; 23 if (!a->idiag) { 24 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 25 } 26 diag = a->idiag; 27 mdiag = a->idiag+bs*bs*mbs; 28 /* factor and invert each block */ 29 switch (a->bs){ 30 case 2: 31 for (i=0; i<mbs; i++) { 32 odiag = v + 4*diag_offset[i]; 33 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 34 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 35 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 36 diag += 4; 37 mdiag += 4; 38 } 39 break; 40 case 3: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 9*diag_offset[i]; 43 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 44 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 45 diag[8] = odiag[8]; 46 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 47 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 48 mdiag[8] = odiag[8]; 49 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 50 diag += 9; 51 mdiag += 9; 52 } 53 break; 54 case 4: 55 for (i=0; i<mbs; i++) { 56 odiag = v + 16*diag_offset[i]; 57 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 58 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 60 diag += 16; 61 mdiag += 16; 62 } 63 break; 64 case 5: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 25*diag_offset[i]; 67 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 68 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 70 diag += 25; 71 mdiag += 25; 72 } 73 break; 74 default: 75 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs); 76 } 77 a->idiagvalid = PETSC_TRUE; 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 83 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 84 { 85 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 86 PetscScalar *x,x1,x2,s1,s2; 87 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 88 PetscErrorCode ierr; 89 int m = a->mbs,i,i2,nz,idx; 90 const int *diag,*ai = a->i,*aj = a->j,*vi; 91 92 PetscFunctionBegin; 93 its = its*lits; 94 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 95 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 96 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 97 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 98 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 99 100 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 101 102 diag = a->diag; 103 idiag = a->idiag; 104 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 105 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 106 107 if (flag & SOR_ZERO_INITIAL_GUESS) { 108 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 109 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 110 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 111 i2 = 2; 112 idiag += 4; 113 for (i=1; i<m; i++) { 114 v = aa + 4*ai[i]; 115 vi = aj + ai[i]; 116 nz = diag[i] - ai[i]; 117 s1 = b[i2]; s2 = b[i2+1]; 118 while (nz--) { 119 idx = 2*(*vi++); 120 x1 = x[idx]; x2 = x[1+idx]; 121 s1 -= v[0]*x1 + v[2]*x2; 122 s2 -= v[1]*x1 + v[3]*x2; 123 v += 4; 124 } 125 x[i2] = idiag[0]*s1 + idiag[2]*s2; 126 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 127 idiag += 4; 128 i2 += 2; 129 } 130 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 131 PetscLogFlops(4*(a->nz)); 132 } 133 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 134 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 135 i2 = 0; 136 mdiag = a->idiag+4*a->mbs; 137 for (i=0; i<m; i++) { 138 x1 = x[i2]; x2 = x[i2+1]; 139 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 140 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 141 mdiag += 4; 142 i2 += 2; 143 } 144 PetscLogFlops(6*m); 145 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 146 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 147 } 148 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 149 idiag = a->idiag+4*a->mbs - 4; 150 i2 = 2*m - 2; 151 x1 = x[i2]; x2 = x[i2+1]; 152 x[i2] = idiag[0]*x1 + idiag[2]*x2; 153 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 154 idiag -= 4; 155 i2 -= 2; 156 for (i=m-2; i>=0; i--) { 157 v = aa + 4*(diag[i]+1); 158 vi = aj + diag[i] + 1; 159 nz = ai[i+1] - diag[i] - 1; 160 s1 = x[i2]; s2 = x[i2+1]; 161 while (nz--) { 162 idx = 2*(*vi++); 163 x1 = x[idx]; x2 = x[1+idx]; 164 s1 -= v[0]*x1 + v[2]*x2; 165 s2 -= v[1]*x1 + v[3]*x2; 166 v += 4; 167 } 168 x[i2] = idiag[0]*s1 + idiag[2]*s2; 169 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 170 idiag -= 4; 171 i2 -= 2; 172 } 173 PetscLogFlops(4*(a->nz)); 174 } 175 } else { 176 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 177 } 178 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 179 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 185 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 186 { 187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 188 PetscScalar *x,x1,x2,x3,s1,s2,s3; 189 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 190 PetscErrorCode ierr; 191 int m = a->mbs,i,i2,nz,idx; 192 const int *diag,*ai = a->i,*aj = a->j,*vi; 193 194 PetscFunctionBegin; 195 its = its*lits; 196 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 197 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 198 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 199 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 200 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 201 202 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 203 204 diag = a->diag; 205 idiag = a->idiag; 206 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 207 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 208 209 if (flag & SOR_ZERO_INITIAL_GUESS) { 210 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 211 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 212 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 213 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 214 i2 = 3; 215 idiag += 9; 216 for (i=1; i<m; i++) { 217 v = aa + 9*ai[i]; 218 vi = aj + ai[i]; 219 nz = diag[i] - ai[i]; 220 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 221 while (nz--) { 222 idx = 3*(*vi++); 223 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 224 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 225 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 226 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 227 v += 9; 228 } 229 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 230 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 231 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 232 idiag += 9; 233 i2 += 3; 234 } 235 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 236 PetscLogFlops(9*(a->nz)); 237 } 238 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 239 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 240 i2 = 0; 241 mdiag = a->idiag+9*a->mbs; 242 for (i=0; i<m; i++) { 243 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 244 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 245 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 246 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 247 mdiag += 9; 248 i2 += 3; 249 } 250 PetscLogFlops(15*m); 251 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 252 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 255 idiag = a->idiag+9*a->mbs - 9; 256 i2 = 3*m - 3; 257 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 258 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 259 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 260 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 261 idiag -= 9; 262 i2 -= 3; 263 for (i=m-2; i>=0; i--) { 264 v = aa + 9*(diag[i]+1); 265 vi = aj + diag[i] + 1; 266 nz = ai[i+1] - diag[i] - 1; 267 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 268 while (nz--) { 269 idx = 3*(*vi++); 270 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 271 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 272 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 273 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 274 v += 9; 275 } 276 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 277 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 278 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 279 idiag -= 9; 280 i2 -= 3; 281 } 282 PetscLogFlops(9*(a->nz)); 283 } 284 } else { 285 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 286 } 287 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 288 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 294 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 295 { 296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 297 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 298 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 299 PetscErrorCode ierr; 300 int m = a->mbs,i,i2,nz,idx; 301 const int *diag,*ai = a->i,*aj = a->j,*vi; 302 303 PetscFunctionBegin; 304 its = its*lits; 305 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 306 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 307 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 308 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 309 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 310 311 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 312 313 diag = a->diag; 314 idiag = a->idiag; 315 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 316 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 317 318 if (flag & SOR_ZERO_INITIAL_GUESS) { 319 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 320 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 321 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 322 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 323 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 324 i2 = 4; 325 idiag += 16; 326 for (i=1; i<m; i++) { 327 v = aa + 16*ai[i]; 328 vi = aj + ai[i]; 329 nz = diag[i] - ai[i]; 330 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 331 while (nz--) { 332 idx = 4*(*vi++); 333 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 334 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 335 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 336 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 337 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 338 v += 16; 339 } 340 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 341 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 342 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 343 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 344 idiag += 16; 345 i2 += 4; 346 } 347 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 348 PetscLogFlops(16*(a->nz)); 349 } 350 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 351 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 352 i2 = 0; 353 mdiag = a->idiag+16*a->mbs; 354 for (i=0; i<m; i++) { 355 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 356 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 357 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 358 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 359 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 360 mdiag += 16; 361 i2 += 4; 362 } 363 PetscLogFlops(28*m); 364 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 365 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 366 } 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 368 idiag = a->idiag+16*a->mbs - 16; 369 i2 = 4*m - 4; 370 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 371 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 372 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 373 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 374 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 375 idiag -= 16; 376 i2 -= 4; 377 for (i=m-2; i>=0; i--) { 378 v = aa + 16*(diag[i]+1); 379 vi = aj + diag[i] + 1; 380 nz = ai[i+1] - diag[i] - 1; 381 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 382 while (nz--) { 383 idx = 4*(*vi++); 384 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 385 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 386 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 387 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 388 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 389 v += 16; 390 } 391 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 392 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 393 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 394 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 395 idiag -= 16; 396 i2 -= 4; 397 } 398 PetscLogFlops(16*(a->nz)); 399 } 400 } else { 401 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 402 } 403 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 404 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 410 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 411 { 412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 413 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 414 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 415 PetscErrorCode ierr; 416 int m = a->mbs,i,i2,nz,idx; 417 const int *diag,*ai = a->i,*aj = a->j,*vi; 418 419 PetscFunctionBegin; 420 its = its*lits; 421 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 422 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 423 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 424 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 425 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 426 427 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 428 429 diag = a->diag; 430 idiag = a->idiag; 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 433 434 if (flag & SOR_ZERO_INITIAL_GUESS) { 435 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 436 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 437 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 438 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 439 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 440 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 441 i2 = 5; 442 idiag += 25; 443 for (i=1; i<m; i++) { 444 v = aa + 25*ai[i]; 445 vi = aj + ai[i]; 446 nz = diag[i] - ai[i]; 447 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 448 while (nz--) { 449 idx = 5*(*vi++); 450 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 451 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 452 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 453 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 454 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 455 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 456 v += 25; 457 } 458 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 459 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 460 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 461 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 462 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 463 idiag += 25; 464 i2 += 5; 465 } 466 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 467 PetscLogFlops(25*(a->nz)); 468 } 469 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 470 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 471 i2 = 0; 472 mdiag = a->idiag+25*a->mbs; 473 for (i=0; i<m; i++) { 474 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 475 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 476 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 477 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 478 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 479 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 480 mdiag += 25; 481 i2 += 5; 482 } 483 PetscLogFlops(45*m); 484 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 485 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 486 } 487 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 488 idiag = a->idiag+25*a->mbs - 25; 489 i2 = 5*m - 5; 490 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 491 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 492 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 493 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 494 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 495 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 496 idiag -= 25; 497 i2 -= 5; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 25*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 503 while (nz--) { 504 idx = 5*(*vi++); 505 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 506 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 507 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 508 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 509 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 510 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 511 v += 25; 512 } 513 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 514 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 515 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 516 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 517 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 518 idiag -= 25; 519 i2 -= 5; 520 } 521 PetscLogFlops(25*(a->nz)); 522 } 523 } else { 524 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 525 } 526 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 527 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 /* 532 Special version for Fun3d sequential benchmark 533 */ 534 #if defined(PETSC_HAVE_FORTRAN_CAPS) 535 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 536 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 537 #define matsetvaluesblocked4_ matsetvaluesblocked4 538 #endif 539 540 EXTERN_C_BEGIN 541 #undef __FUNCT__ 542 #define __FUNCT__ "matsetvaluesblocked4_" 543 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 544 { 545 Mat A = *AA; 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 547 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 548 int *ai=a->i,*ailen=a->ilen; 549 int *aj=a->j,stepval; 550 const PetscScalar *value = v; 551 MatScalar *ap,*aa = a->a,*bap; 552 553 PetscFunctionBegin; 554 stepval = (n-1)*4; 555 for (k=0; k<m; k++) { /* loop over added rows */ 556 row = im[k]; 557 rp = aj + ai[row]; 558 ap = aa + 16*ai[row]; 559 nrow = ailen[row]; 560 low = 0; 561 for (l=0; l<n; l++) { /* loop over added columns */ 562 col = in[l]; 563 value = v + k*(stepval+4)*4 + l*4; 564 low = 0; high = nrow; 565 while (high-low > 7) { 566 t = (low+high)/2; 567 if (rp[t] > col) high = t; 568 else low = t; 569 } 570 for (i=low; i<high; i++) { 571 if (rp[i] > col) break; 572 if (rp[i] == col) { 573 bap = ap + 16*i; 574 for (ii=0; ii<4; ii++,value+=stepval) { 575 for (jj=ii; jj<16; jj+=4) { 576 bap[jj] += *value++; 577 } 578 } 579 goto noinsert2; 580 } 581 } 582 N = nrow++ - 1; 583 /* shift up all the later entries in this row */ 584 for (ii=N; ii>=i; ii--) { 585 rp[ii+1] = rp[ii]; 586 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 587 } 588 if (N >= i) { 589 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 590 } 591 rp[i] = col; 592 bap = ap + 16*i; 593 for (ii=0; ii<4; ii++,value+=stepval) { 594 for (jj=ii; jj<16; jj+=4) { 595 bap[jj] = *value++; 596 } 597 } 598 noinsert2:; 599 low = i; 600 } 601 ailen[row] = nrow; 602 } 603 } 604 EXTERN_C_END 605 606 #if defined(PETSC_HAVE_FORTRAN_CAPS) 607 #define matsetvalues4_ MATSETVALUES4 608 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 609 #define matsetvalues4_ matsetvalues4 610 #endif 611 612 EXTERN_C_BEGIN 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatSetValues4_" 615 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 616 { 617 Mat A = *AA; 618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 619 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 620 int *ai=a->i,*ailen=a->ilen; 621 int *aj=a->j,brow,bcol; 622 int ridx,cidx; 623 MatScalar *ap,value,*aa=a->a,*bap; 624 625 PetscFunctionBegin; 626 for (k=0; k<m; k++) { /* loop over added rows */ 627 row = im[k]; brow = row/4; 628 rp = aj + ai[brow]; 629 ap = aa + 16*ai[brow]; 630 nrow = ailen[brow]; 631 low = 0; 632 for (l=0; l<n; l++) { /* loop over added columns */ 633 col = in[l]; bcol = col/4; 634 ridx = row % 4; cidx = col % 4; 635 value = v[l + k*n]; 636 low = 0; high = nrow; 637 while (high-low > 7) { 638 t = (low+high)/2; 639 if (rp[t] > bcol) high = t; 640 else low = t; 641 } 642 for (i=low; i<high; i++) { 643 if (rp[i] > bcol) break; 644 if (rp[i] == bcol) { 645 bap = ap + 16*i + 4*cidx + ridx; 646 *bap += value; 647 goto noinsert1; 648 } 649 } 650 N = nrow++ - 1; 651 /* shift up all the later entries in this row */ 652 for (ii=N; ii>=i; ii--) { 653 rp[ii+1] = rp[ii]; 654 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 655 } 656 if (N>=i) { 657 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 658 } 659 rp[i] = bcol; 660 ap[16*i + 4*cidx + ridx] = value; 661 noinsert1:; 662 low = i; 663 } 664 ailen[brow] = nrow; 665 } 666 } 667 EXTERN_C_END 668 669 /* UGLY, ugly, ugly 670 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 671 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 672 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 673 converts the entries into single precision and then calls ..._MatScalar() to put them 674 into the single precision data structures. 675 */ 676 #if defined(PETSC_USE_MAT_SINGLE) 677 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 678 #else 679 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 680 #endif 681 682 #define CHUNKSIZE 10 683 684 /* 685 Checks for missing diagonals 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 689 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 690 { 691 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 692 int *diag,*jj = a->j,i,ierr; 693 694 PetscFunctionBegin; 695 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 696 diag = a->diag; 697 for (i=0; i<a->mbs; i++) { 698 if (jj[diag[i]] != i) { 699 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i); 700 } 701 } 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 707 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 708 { 709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 710 int i,j,*diag,m = a->mbs,ierr; 711 712 PetscFunctionBegin; 713 if (a->diag) PetscFunctionReturn(0); 714 715 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 716 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 717 for (i=0; i<m; i++) { 718 diag[i] = a->i[i+1]; 719 for (j=a->i[i]; j<a->i[i+1]; j++) { 720 if (a->j[j] == i) { 721 diag[i] = j; 722 break; 723 } 724 } 725 } 726 a->diag = diag; 727 PetscFunctionReturn(0); 728 } 729 730 731 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 735 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 736 { 737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 738 PetscErrorCode ierr; 739 int n = a->mbs,i; 740 741 PetscFunctionBegin; 742 *nn = n; 743 if (!ia) PetscFunctionReturn(0); 744 if (symmetric) { 745 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 746 } else if (oshift == 1) { 747 /* temporarily add 1 to i and j indices */ 748 int nz = a->i[n]; 749 for (i=0; i<nz; i++) a->j[i]++; 750 for (i=0; i<n+1; i++) a->i[i]++; 751 *ia = a->i; *ja = a->j; 752 } else { 753 *ia = a->i; *ja = a->j; 754 } 755 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 761 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 762 { 763 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 764 int i,n = a->mbs,ierr; 765 766 PetscFunctionBegin; 767 if (!ia) PetscFunctionReturn(0); 768 if (symmetric) { 769 ierr = PetscFree(*ia);CHKERRQ(ierr); 770 ierr = PetscFree(*ja);CHKERRQ(ierr); 771 } else if (oshift == 1) { 772 int nz = a->i[n]-1; 773 for (i=0; i<nz; i++) a->j[i]--; 774 for (i=0; i<n+1; i++) a->i[i]--; 775 } 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 781 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 782 { 783 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 784 785 PetscFunctionBegin; 786 *bs = baij->bs; 787 PetscFunctionReturn(0); 788 } 789 790 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatDestroy_SeqBAIJ" 793 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 794 { 795 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 796 PetscErrorCode ierr; 797 798 PetscFunctionBegin; 799 #if defined(PETSC_USE_LOG) 800 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 801 #endif 802 ierr = PetscFree(a->a);CHKERRQ(ierr); 803 if (!a->singlemalloc) { 804 ierr = PetscFree(a->i);CHKERRQ(ierr); 805 ierr = PetscFree(a->j);CHKERRQ(ierr); 806 } 807 if (a->row) { 808 ierr = ISDestroy(a->row);CHKERRQ(ierr); 809 } 810 if (a->col) { 811 ierr = ISDestroy(a->col);CHKERRQ(ierr); 812 } 813 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 814 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 815 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 816 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 817 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 818 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 819 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 820 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 821 #if defined(PETSC_USE_MAT_SINGLE) 822 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 823 #endif 824 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 825 826 ierr = PetscFree(a);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatSetOption_SeqBAIJ" 832 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 836 PetscFunctionBegin; 837 switch (op) { 838 case MAT_ROW_ORIENTED: 839 a->roworiented = PETSC_TRUE; 840 break; 841 case MAT_COLUMN_ORIENTED: 842 a->roworiented = PETSC_FALSE; 843 break; 844 case MAT_COLUMNS_SORTED: 845 a->sorted = PETSC_TRUE; 846 break; 847 case MAT_COLUMNS_UNSORTED: 848 a->sorted = PETSC_FALSE; 849 break; 850 case MAT_KEEP_ZEROED_ROWS: 851 a->keepzeroedrows = PETSC_TRUE; 852 break; 853 case MAT_NO_NEW_NONZERO_LOCATIONS: 854 a->nonew = 1; 855 break; 856 case MAT_NEW_NONZERO_LOCATION_ERR: 857 a->nonew = -1; 858 break; 859 case MAT_NEW_NONZERO_ALLOCATION_ERR: 860 a->nonew = -2; 861 break; 862 case MAT_YES_NEW_NONZERO_LOCATIONS: 863 a->nonew = 0; 864 break; 865 case MAT_ROWS_SORTED: 866 case MAT_ROWS_UNSORTED: 867 case MAT_YES_NEW_DIAGONALS: 868 case MAT_IGNORE_OFF_PROC_ENTRIES: 869 case MAT_USE_HASH_TABLE: 870 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 871 break; 872 case MAT_NO_NEW_DIAGONALS: 873 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 874 case MAT_SYMMETRIC: 875 case MAT_STRUCTURALLY_SYMMETRIC: 876 case MAT_NOT_SYMMETRIC: 877 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 878 case MAT_HERMITIAN: 879 case MAT_NOT_HERMITIAN: 880 case MAT_SYMMETRY_ETERNAL: 881 case MAT_NOT_SYMMETRY_ETERNAL: 882 break; 883 default: 884 SETERRQ(PETSC_ERR_SUP,"unknown option"); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatGetRow_SeqBAIJ" 891 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 892 { 893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 894 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 895 MatScalar *aa,*aa_i; 896 PetscScalar *v_i; 897 898 PetscFunctionBegin; 899 bs = a->bs; 900 ai = a->i; 901 aj = a->j; 902 aa = a->a; 903 bs2 = a->bs2; 904 905 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 906 907 bn = row/bs; /* Block number */ 908 bp = row % bs; /* Block Position */ 909 M = ai[bn+1] - ai[bn]; 910 *nz = bs*M; 911 912 if (v) { 913 *v = 0; 914 if (*nz) { 915 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 916 for (i=0; i<M; i++) { /* for each block in the block row */ 917 v_i = *v + i*bs; 918 aa_i = aa + bs2*(ai[bn] + i); 919 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 920 } 921 } 922 } 923 924 if (idx) { 925 *idx = 0; 926 if (*nz) { 927 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 928 for (i=0; i<M; i++) { /* for each block in the block row */ 929 idx_i = *idx + i*bs; 930 itmp = bs*aj[ai[bn] + i]; 931 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 932 } 933 } 934 } 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 940 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 941 { 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 946 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatTranspose_SeqBAIJ" 952 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 953 { 954 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 955 Mat C; 956 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 957 int *rows,*cols,bs2=a->bs2; 958 PetscScalar *array; 959 960 PetscFunctionBegin; 961 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 962 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 963 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 964 965 #if defined(PETSC_USE_MAT_SINGLE) 966 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 967 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 968 #else 969 array = a->a; 970 #endif 971 972 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 973 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 974 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 975 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 976 ierr = PetscFree(col);CHKERRQ(ierr); 977 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 978 cols = rows + bs; 979 for (i=0; i<mbs; i++) { 980 cols[0] = i*bs; 981 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 982 len = ai[i+1] - ai[i]; 983 for (j=0; j<len; j++) { 984 rows[0] = (*aj++)*bs; 985 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 986 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 987 array += bs2; 988 } 989 } 990 ierr = PetscFree(rows);CHKERRQ(ierr); 991 #if defined(PETSC_USE_MAT_SINGLE) 992 ierr = PetscFree(array); 993 #endif 994 995 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 997 998 if (B) { 999 *B = C; 1000 } else { 1001 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1008 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1009 { 1010 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1011 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1012 PetscScalar *aa; 1013 FILE *file; 1014 1015 PetscFunctionBegin; 1016 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1017 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1018 col_lens[0] = MAT_FILE_COOKIE; 1019 1020 col_lens[1] = A->m; 1021 col_lens[2] = A->n; 1022 col_lens[3] = a->nz*bs2; 1023 1024 /* store lengths of each row and write (including header) to file */ 1025 count = 0; 1026 for (i=0; i<a->mbs; i++) { 1027 for (j=0; j<bs; j++) { 1028 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1029 } 1030 } 1031 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1032 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1033 1034 /* store column indices (zero start index) */ 1035 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1036 count = 0; 1037 for (i=0; i<a->mbs; i++) { 1038 for (j=0; j<bs; j++) { 1039 for (k=a->i[i]; k<a->i[i+1]; k++) { 1040 for (l=0; l<bs; l++) { 1041 jj[count++] = bs*a->j[k] + l; 1042 } 1043 } 1044 } 1045 } 1046 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1047 ierr = PetscFree(jj);CHKERRQ(ierr); 1048 1049 /* store nonzero values */ 1050 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1051 count = 0; 1052 for (i=0; i<a->mbs; i++) { 1053 for (j=0; j<bs; j++) { 1054 for (k=a->i[i]; k<a->i[i+1]; k++) { 1055 for (l=0; l<bs; l++) { 1056 aa[count++] = a->a[bs2*k + l*bs + j]; 1057 } 1058 } 1059 } 1060 } 1061 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1062 ierr = PetscFree(aa);CHKERRQ(ierr); 1063 1064 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1065 if (file) { 1066 fprintf(file,"-matload_block_size %d\n",a->bs); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1073 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1074 { 1075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1076 PetscErrorCode ierr; 1077 int i,j,bs = a->bs,k,l,bs2=a->bs2; 1078 PetscViewerFormat format; 1079 1080 PetscFunctionBegin; 1081 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1082 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1083 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1084 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1085 Mat aij; 1086 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1087 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1088 ierr = MatDestroy(aij);CHKERRQ(ierr); 1089 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1090 PetscFunctionReturn(0); 1091 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1092 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1093 for (i=0; i<a->mbs; i++) { 1094 for (j=0; j<bs; j++) { 1095 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1096 for (k=a->i[i]; k<a->i[i+1]; k++) { 1097 for (l=0; l<bs; l++) { 1098 #if defined(PETSC_USE_COMPLEX) 1099 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1100 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1101 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1102 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1103 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1104 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1105 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1106 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1107 } 1108 #else 1109 if (a->a[bs2*k + l*bs + j] != 0.0) { 1110 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1111 } 1112 #endif 1113 } 1114 } 1115 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1116 } 1117 } 1118 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1119 } else { 1120 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1121 for (i=0; i<a->mbs; i++) { 1122 for (j=0; j<bs; j++) { 1123 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1124 for (k=a->i[i]; k<a->i[i+1]; k++) { 1125 for (l=0; l<bs; l++) { 1126 #if defined(PETSC_USE_COMPLEX) 1127 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1128 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1129 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1130 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1131 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1132 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1133 } else { 1134 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1135 } 1136 #else 1137 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1138 #endif 1139 } 1140 } 1141 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1142 } 1143 } 1144 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1145 } 1146 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1152 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1153 { 1154 Mat A = (Mat) Aa; 1155 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1156 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1157 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1158 MatScalar *aa; 1159 PetscViewer viewer; 1160 1161 PetscFunctionBegin; 1162 1163 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1164 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1165 1166 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1167 1168 /* loop over matrix elements drawing boxes */ 1169 color = PETSC_DRAW_BLUE; 1170 for (i=0,row=0; i<mbs; i++,row+=bs) { 1171 for (j=a->i[i]; j<a->i[i+1]; j++) { 1172 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1173 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1174 aa = a->a + j*bs2; 1175 for (k=0; k<bs; k++) { 1176 for (l=0; l<bs; l++) { 1177 if (PetscRealPart(*aa++) >= 0.) continue; 1178 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1179 } 1180 } 1181 } 1182 } 1183 color = PETSC_DRAW_CYAN; 1184 for (i=0,row=0; i<mbs; i++,row+=bs) { 1185 for (j=a->i[i]; j<a->i[i+1]; j++) { 1186 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1187 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1188 aa = a->a + j*bs2; 1189 for (k=0; k<bs; k++) { 1190 for (l=0; l<bs; l++) { 1191 if (PetscRealPart(*aa++) != 0.) continue; 1192 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1193 } 1194 } 1195 } 1196 } 1197 1198 color = PETSC_DRAW_RED; 1199 for (i=0,row=0; i<mbs; i++,row+=bs) { 1200 for (j=a->i[i]; j<a->i[i+1]; j++) { 1201 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1202 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1203 aa = a->a + j*bs2; 1204 for (k=0; k<bs; k++) { 1205 for (l=0; l<bs; l++) { 1206 if (PetscRealPart(*aa++) <= 0.) continue; 1207 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1208 } 1209 } 1210 } 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1217 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1218 { 1219 PetscErrorCode ierr; 1220 PetscReal xl,yl,xr,yr,w,h; 1221 PetscDraw draw; 1222 PetscTruth isnull; 1223 1224 PetscFunctionBegin; 1225 1226 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1227 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1228 1229 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1230 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1231 xr += w; yr += h; xl = -w; yl = -h; 1232 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1233 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1234 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatView_SeqBAIJ" 1240 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1241 { 1242 PetscErrorCode ierr; 1243 PetscTruth iascii,isbinary,isdraw; 1244 1245 PetscFunctionBegin; 1246 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1247 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1248 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1249 if (iascii){ 1250 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1251 } else if (isbinary) { 1252 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1253 } else if (isdraw) { 1254 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1255 } else { 1256 Mat B; 1257 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1258 ierr = MatView(B,viewer);CHKERRQ(ierr); 1259 ierr = MatDestroy(B);CHKERRQ(ierr); 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1267 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1268 { 1269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1270 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1271 int *ai = a->i,*ailen = a->ilen; 1272 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1273 MatScalar *ap,*aa = a->a,zero = 0.0; 1274 1275 PetscFunctionBegin; 1276 for (k=0; k<m; k++) { /* loop over rows */ 1277 row = im[k]; brow = row/bs; 1278 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1279 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1280 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1281 nrow = ailen[brow]; 1282 for (l=0; l<n; l++) { /* loop over columns */ 1283 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1284 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1285 col = in[l] ; 1286 bcol = col/bs; 1287 cidx = col%bs; 1288 ridx = row%bs; 1289 high = nrow; 1290 low = 0; /* assume unsorted */ 1291 while (high-low > 5) { 1292 t = (low+high)/2; 1293 if (rp[t] > bcol) high = t; 1294 else low = t; 1295 } 1296 for (i=low; i<high; i++) { 1297 if (rp[i] > bcol) break; 1298 if (rp[i] == bcol) { 1299 *v++ = ap[bs2*i+bs*cidx+ridx]; 1300 goto finished; 1301 } 1302 } 1303 *v++ = zero; 1304 finished:; 1305 } 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #if defined(PETSC_USE_MAT_SINGLE) 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1313 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1314 { 1315 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1316 PetscErrorCode ierr; 1317 int i,N = m*n*b->bs2; 1318 MatScalar *vsingle; 1319 1320 PetscFunctionBegin; 1321 if (N > b->setvalueslen) { 1322 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1323 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1324 b->setvalueslen = N; 1325 } 1326 vsingle = b->setvaluescopy; 1327 for (i=0; i<N; i++) { 1328 vsingle[i] = v[i]; 1329 } 1330 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 #endif 1334 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1338 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1339 { 1340 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1341 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1342 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1343 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 1344 PetscTruth roworiented=a->roworiented; 1345 const MatScalar *value = v; 1346 MatScalar *ap,*aa = a->a,*bap; 1347 1348 PetscFunctionBegin; 1349 if (roworiented) { 1350 stepval = (n-1)*bs; 1351 } else { 1352 stepval = (m-1)*bs; 1353 } 1354 for (k=0; k<m; k++) { /* loop over added rows */ 1355 row = im[k]; 1356 if (row < 0) continue; 1357 #if defined(PETSC_USE_BOPT_g) 1358 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1359 #endif 1360 rp = aj + ai[row]; 1361 ap = aa + bs2*ai[row]; 1362 rmax = imax[row]; 1363 nrow = ailen[row]; 1364 low = 0; 1365 for (l=0; l<n; l++) { /* loop over added columns */ 1366 if (in[l] < 0) continue; 1367 #if defined(PETSC_USE_BOPT_g) 1368 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1369 #endif 1370 col = in[l]; 1371 if (roworiented) { 1372 value = v + k*(stepval+bs)*bs + l*bs; 1373 } else { 1374 value = v + l*(stepval+bs)*bs + k*bs; 1375 } 1376 if (!sorted) low = 0; high = nrow; 1377 while (high-low > 7) { 1378 t = (low+high)/2; 1379 if (rp[t] > col) high = t; 1380 else low = t; 1381 } 1382 for (i=low; i<high; i++) { 1383 if (rp[i] > col) break; 1384 if (rp[i] == col) { 1385 bap = ap + bs2*i; 1386 if (roworiented) { 1387 if (is == ADD_VALUES) { 1388 for (ii=0; ii<bs; ii++,value+=stepval) { 1389 for (jj=ii; jj<bs2; jj+=bs) { 1390 bap[jj] += *value++; 1391 } 1392 } 1393 } else { 1394 for (ii=0; ii<bs; ii++,value+=stepval) { 1395 for (jj=ii; jj<bs2; jj+=bs) { 1396 bap[jj] = *value++; 1397 } 1398 } 1399 } 1400 } else { 1401 if (is == ADD_VALUES) { 1402 for (ii=0; ii<bs; ii++,value+=stepval) { 1403 for (jj=0; jj<bs; jj++) { 1404 *bap++ += *value++; 1405 } 1406 } 1407 } else { 1408 for (ii=0; ii<bs; ii++,value+=stepval) { 1409 for (jj=0; jj<bs; jj++) { 1410 *bap++ = *value++; 1411 } 1412 } 1413 } 1414 } 1415 goto noinsert2; 1416 } 1417 } 1418 if (nonew == 1) goto noinsert2; 1419 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1420 if (nrow >= rmax) { 1421 /* there is no extra room in row, therefore enlarge */ 1422 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1423 MatScalar *new_a; 1424 1425 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1426 1427 /* malloc new storage space */ 1428 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1429 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1430 new_j = (int*)(new_a + bs2*new_nz); 1431 new_i = new_j + new_nz; 1432 1433 /* copy over old data into new slots */ 1434 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1435 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1436 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1437 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1438 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1439 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1440 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1441 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1442 /* free up old matrix storage */ 1443 ierr = PetscFree(a->a);CHKERRQ(ierr); 1444 if (!a->singlemalloc) { 1445 ierr = PetscFree(a->i);CHKERRQ(ierr); 1446 ierr = PetscFree(a->j);CHKERRQ(ierr); 1447 } 1448 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1449 a->singlemalloc = PETSC_TRUE; 1450 1451 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1452 rmax = imax[row] = imax[row] + CHUNKSIZE; 1453 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1454 a->maxnz += bs2*CHUNKSIZE; 1455 a->reallocs++; 1456 a->nz++; 1457 } 1458 N = nrow++ - 1; 1459 /* shift up all the later entries in this row */ 1460 for (ii=N; ii>=i; ii--) { 1461 rp[ii+1] = rp[ii]; 1462 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1463 } 1464 if (N >= i) { 1465 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1466 } 1467 rp[i] = col; 1468 bap = ap + bs2*i; 1469 if (roworiented) { 1470 for (ii=0; ii<bs; ii++,value+=stepval) { 1471 for (jj=ii; jj<bs2; jj+=bs) { 1472 bap[jj] = *value++; 1473 } 1474 } 1475 } else { 1476 for (ii=0; ii<bs; ii++,value+=stepval) { 1477 for (jj=0; jj<bs; jj++) { 1478 *bap++ = *value++; 1479 } 1480 } 1481 } 1482 noinsert2:; 1483 low = i; 1484 } 1485 ailen[row] = nrow; 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1492 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1493 { 1494 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1495 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1496 int m = A->m,*ip,N,*ailen = a->ilen; 1497 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 1498 MatScalar *aa = a->a,*ap; 1499 1500 PetscFunctionBegin; 1501 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1502 1503 if (m) rmax = ailen[0]; 1504 for (i=1; i<mbs; i++) { 1505 /* move each row back by the amount of empty slots (fshift) before it*/ 1506 fshift += imax[i-1] - ailen[i-1]; 1507 rmax = PetscMax(rmax,ailen[i]); 1508 if (fshift) { 1509 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1510 N = ailen[i]; 1511 for (j=0; j<N; j++) { 1512 ip[j-fshift] = ip[j]; 1513 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1514 } 1515 } 1516 ai[i] = ai[i-1] + ailen[i-1]; 1517 } 1518 if (mbs) { 1519 fshift += imax[mbs-1] - ailen[mbs-1]; 1520 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1521 } 1522 /* reset ilen and imax for each row */ 1523 for (i=0; i<mbs; i++) { 1524 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1525 } 1526 a->nz = ai[mbs]; 1527 1528 /* diagonals may have moved, so kill the diagonal pointers */ 1529 a->idiagvalid = PETSC_FALSE; 1530 if (fshift && a->diag) { 1531 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1532 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1533 a->diag = 0; 1534 } 1535 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1536 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1537 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1538 a->reallocs = 0; 1539 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1540 1541 PetscFunctionReturn(0); 1542 } 1543 1544 1545 1546 /* 1547 This function returns an array of flags which indicate the locations of contiguous 1548 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1549 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1550 Assume: sizes should be long enough to hold all the values. 1551 */ 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1554 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1555 { 1556 int i,j,k,row; 1557 PetscTruth flg; 1558 1559 PetscFunctionBegin; 1560 for (i=0,j=0; i<n; j++) { 1561 row = idx[i]; 1562 if (row%bs!=0) { /* Not the begining of a block */ 1563 sizes[j] = 1; 1564 i++; 1565 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1566 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1567 i++; 1568 } else { /* Begining of the block, so check if the complete block exists */ 1569 flg = PETSC_TRUE; 1570 for (k=1; k<bs; k++) { 1571 if (row+k != idx[i+k]) { /* break in the block */ 1572 flg = PETSC_FALSE; 1573 break; 1574 } 1575 } 1576 if (flg == PETSC_TRUE) { /* No break in the bs */ 1577 sizes[j] = bs; 1578 i+= bs; 1579 } else { 1580 sizes[j] = 1; 1581 i++; 1582 } 1583 } 1584 } 1585 *bs_max = j; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1591 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1592 { 1593 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 int i,j,k,count,is_n,*is_idx,*rows; 1596 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1597 PetscScalar zero = 0.0; 1598 MatScalar *aa; 1599 1600 PetscFunctionBegin; 1601 /* Make a copy of the IS and sort it */ 1602 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1603 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1604 1605 /* allocate memory for rows,sizes */ 1606 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1607 sizes = rows + is_n; 1608 1609 /* copy IS values to rows, and sort them */ 1610 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1611 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1612 if (baij->keepzeroedrows) { 1613 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1614 bs_max = is_n; 1615 } else { 1616 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1617 } 1618 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1619 1620 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1621 row = rows[j]; 1622 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1623 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1624 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1625 if (sizes[i] == bs && !baij->keepzeroedrows) { 1626 if (diag) { 1627 if (baij->ilen[row/bs] > 0) { 1628 baij->ilen[row/bs] = 1; 1629 baij->j[baij->i[row/bs]] = row/bs; 1630 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1631 } 1632 /* Now insert all the diagonal values for this bs */ 1633 for (k=0; k<bs; k++) { 1634 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1635 } 1636 } else { /* (!diag) */ 1637 baij->ilen[row/bs] = 0; 1638 } /* end (!diag) */ 1639 } else { /* (sizes[i] != bs) */ 1640 #if defined (PETSC_USE_DEBUG) 1641 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1642 #endif 1643 for (k=0; k<count; k++) { 1644 aa[0] = zero; 1645 aa += bs; 1646 } 1647 if (diag) { 1648 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 1653 ierr = PetscFree(rows);CHKERRQ(ierr); 1654 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1660 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1661 { 1662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1663 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1664 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1665 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1666 int ridx,cidx,bs2=a->bs2,ierr; 1667 PetscTruth roworiented=a->roworiented; 1668 MatScalar *ap,value,*aa=a->a,*bap; 1669 1670 PetscFunctionBegin; 1671 for (k=0; k<m; k++) { /* loop over added rows */ 1672 row = im[k]; brow = row/bs; 1673 if (row < 0) continue; 1674 #if defined(PETSC_USE_BOPT_g) 1675 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1676 #endif 1677 rp = aj + ai[brow]; 1678 ap = aa + bs2*ai[brow]; 1679 rmax = imax[brow]; 1680 nrow = ailen[brow]; 1681 low = 0; 1682 for (l=0; l<n; l++) { /* loop over added columns */ 1683 if (in[l] < 0) continue; 1684 #if defined(PETSC_USE_BOPT_g) 1685 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1686 #endif 1687 col = in[l]; bcol = col/bs; 1688 ridx = row % bs; cidx = col % bs; 1689 if (roworiented) { 1690 value = v[l + k*n]; 1691 } else { 1692 value = v[k + l*m]; 1693 } 1694 if (!sorted) low = 0; high = nrow; 1695 while (high-low > 7) { 1696 t = (low+high)/2; 1697 if (rp[t] > bcol) high = t; 1698 else low = t; 1699 } 1700 for (i=low; i<high; i++) { 1701 if (rp[i] > bcol) break; 1702 if (rp[i] == bcol) { 1703 bap = ap + bs2*i + bs*cidx + ridx; 1704 if (is == ADD_VALUES) *bap += value; 1705 else *bap = value; 1706 goto noinsert1; 1707 } 1708 } 1709 if (nonew == 1) goto noinsert1; 1710 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1711 if (nrow >= rmax) { 1712 /* there is no extra room in row, therefore enlarge */ 1713 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1714 MatScalar *new_a; 1715 1716 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1717 1718 /* Malloc new storage space */ 1719 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1720 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1721 new_j = (int*)(new_a + bs2*new_nz); 1722 new_i = new_j + new_nz; 1723 1724 /* copy over old data into new slots */ 1725 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1726 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1727 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1728 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1729 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1730 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1731 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1732 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1733 /* free up old matrix storage */ 1734 ierr = PetscFree(a->a);CHKERRQ(ierr); 1735 if (!a->singlemalloc) { 1736 ierr = PetscFree(a->i);CHKERRQ(ierr); 1737 ierr = PetscFree(a->j);CHKERRQ(ierr); 1738 } 1739 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1740 a->singlemalloc = PETSC_TRUE; 1741 1742 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1743 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1744 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1745 a->maxnz += bs2*CHUNKSIZE; 1746 a->reallocs++; 1747 a->nz++; 1748 } 1749 N = nrow++ - 1; 1750 /* shift up all the later entries in this row */ 1751 for (ii=N; ii>=i; ii--) { 1752 rp[ii+1] = rp[ii]; 1753 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1754 } 1755 if (N>=i) { 1756 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1757 } 1758 rp[i] = bcol; 1759 ap[bs2*i + bs*cidx + ridx] = value; 1760 noinsert1:; 1761 low = i; 1762 } 1763 ailen[brow] = nrow; 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1771 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1772 { 1773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1774 Mat outA; 1775 PetscErrorCode ierr; 1776 PetscTruth row_identity,col_identity; 1777 1778 PetscFunctionBegin; 1779 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1780 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1781 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1782 if (!row_identity || !col_identity) { 1783 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1784 } 1785 1786 outA = inA; 1787 inA->factor = FACTOR_LU; 1788 1789 if (!a->diag) { 1790 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1791 } 1792 1793 a->row = row; 1794 a->col = col; 1795 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1796 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1797 1798 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1799 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1800 PetscLogObjectParent(inA,a->icol); 1801 1802 /* 1803 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1804 for ILU(0) factorization with natural ordering 1805 */ 1806 if (a->bs < 8) { 1807 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1808 } else { 1809 if (!a->solve_work) { 1810 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1811 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1812 } 1813 } 1814 1815 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1816 1817 PetscFunctionReturn(0); 1818 } 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1821 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1822 { 1823 static PetscTruth called = PETSC_FALSE; 1824 MPI_Comm comm = A->comm; 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1829 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1830 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 EXTERN_C_BEGIN 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1837 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1838 { 1839 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1840 int i,nz,nbs; 1841 1842 PetscFunctionBegin; 1843 nz = baij->maxnz/baij->bs2; 1844 nbs = baij->nbs; 1845 for (i=0; i<nz; i++) { 1846 baij->j[i] = indices[i]; 1847 } 1848 baij->nz = nz; 1849 for (i=0; i<nbs; i++) { 1850 baij->ilen[i] = baij->imax[i]; 1851 } 1852 1853 PetscFunctionReturn(0); 1854 } 1855 EXTERN_C_END 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1859 /*@ 1860 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1861 in the matrix. 1862 1863 Input Parameters: 1864 + mat - the SeqBAIJ matrix 1865 - indices - the column indices 1866 1867 Level: advanced 1868 1869 Notes: 1870 This can be called if you have precomputed the nonzero structure of the 1871 matrix and want to provide it to the matrix object to improve the performance 1872 of the MatSetValues() operation. 1873 1874 You MUST have set the correct numbers of nonzeros per row in the call to 1875 MatCreateSeqBAIJ(). 1876 1877 MUST be called before any calls to MatSetValues(); 1878 1879 @*/ 1880 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1881 { 1882 PetscErrorCode ierr,(*f)(Mat,int *); 1883 1884 PetscFunctionBegin; 1885 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1886 PetscValidPointer(indices,2); 1887 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1888 if (f) { 1889 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1890 } else { 1891 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1898 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1899 { 1900 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 int i,j,n,row,bs,*ai,*aj,mbs; 1903 PetscReal atmp; 1904 PetscScalar *x,zero = 0.0; 1905 MatScalar *aa; 1906 int ncols,brow,krow,kcol; 1907 1908 PetscFunctionBegin; 1909 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1910 bs = a->bs; 1911 aa = a->a; 1912 ai = a->i; 1913 aj = a->j; 1914 mbs = a->mbs; 1915 1916 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1917 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1918 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1919 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1920 for (i=0; i<mbs; i++) { 1921 ncols = ai[1] - ai[0]; ai++; 1922 brow = bs*i; 1923 for (j=0; j<ncols; j++){ 1924 /* bcol = bs*(*aj); */ 1925 for (kcol=0; kcol<bs; kcol++){ 1926 for (krow=0; krow<bs; krow++){ 1927 atmp = PetscAbsScalar(*aa); aa++; 1928 row = brow + krow; /* row index */ 1929 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1930 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1931 } 1932 } 1933 aj++; 1934 } 1935 } 1936 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1942 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1943 { 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1953 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1954 { 1955 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1956 PetscFunctionBegin; 1957 *array = a->a; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1963 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1964 { 1965 PetscFunctionBegin; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #include "petscblaslapack.h" 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1972 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1973 { 1974 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1975 PetscErrorCode ierr; 1976 int one=1,i,bs=y->bs,j,bs2; 1977 1978 PetscFunctionBegin; 1979 if (str == SAME_NONZERO_PATTERN) { 1980 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1981 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1982 if (y->xtoy && y->XtoY != X) { 1983 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1984 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1985 } 1986 if (!y->xtoy) { /* get xtoy */ 1987 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1988 y->XtoY = X; 1989 } 1990 bs2 = bs*bs; 1991 for (i=0; i<x->nz; i++) { 1992 j = 0; 1993 while (j < bs2){ 1994 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1995 j++; 1996 } 1997 } 1998 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1999 } else { 2000 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2001 } 2002 PetscFunctionReturn(0); 2003 } 2004 2005 /* -------------------------------------------------------------------*/ 2006 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2007 MatGetRow_SeqBAIJ, 2008 MatRestoreRow_SeqBAIJ, 2009 MatMult_SeqBAIJ_N, 2010 /* 4*/ MatMultAdd_SeqBAIJ_N, 2011 MatMultTranspose_SeqBAIJ, 2012 MatMultTransposeAdd_SeqBAIJ, 2013 MatSolve_SeqBAIJ_N, 2014 0, 2015 0, 2016 /*10*/ 0, 2017 MatLUFactor_SeqBAIJ, 2018 0, 2019 0, 2020 MatTranspose_SeqBAIJ, 2021 /*15*/ MatGetInfo_SeqBAIJ, 2022 MatEqual_SeqBAIJ, 2023 MatGetDiagonal_SeqBAIJ, 2024 MatDiagonalScale_SeqBAIJ, 2025 MatNorm_SeqBAIJ, 2026 /*20*/ 0, 2027 MatAssemblyEnd_SeqBAIJ, 2028 0, 2029 MatSetOption_SeqBAIJ, 2030 MatZeroEntries_SeqBAIJ, 2031 /*25*/ MatZeroRows_SeqBAIJ, 2032 MatLUFactorSymbolic_SeqBAIJ, 2033 MatLUFactorNumeric_SeqBAIJ_N, 2034 0, 2035 0, 2036 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2037 MatILUFactorSymbolic_SeqBAIJ, 2038 0, 2039 MatGetArray_SeqBAIJ, 2040 MatRestoreArray_SeqBAIJ, 2041 /*35*/ MatDuplicate_SeqBAIJ, 2042 0, 2043 0, 2044 MatILUFactor_SeqBAIJ, 2045 0, 2046 /*40*/ MatAXPY_SeqBAIJ, 2047 MatGetSubMatrices_SeqBAIJ, 2048 MatIncreaseOverlap_SeqBAIJ, 2049 MatGetValues_SeqBAIJ, 2050 0, 2051 /*45*/ MatPrintHelp_SeqBAIJ, 2052 MatScale_SeqBAIJ, 2053 0, 2054 0, 2055 0, 2056 /*50*/ MatGetBlockSize_SeqBAIJ, 2057 MatGetRowIJ_SeqBAIJ, 2058 MatRestoreRowIJ_SeqBAIJ, 2059 0, 2060 0, 2061 /*55*/ 0, 2062 0, 2063 0, 2064 0, 2065 MatSetValuesBlocked_SeqBAIJ, 2066 /*60*/ MatGetSubMatrix_SeqBAIJ, 2067 MatDestroy_SeqBAIJ, 2068 MatView_SeqBAIJ, 2069 MatGetPetscMaps_Petsc, 2070 0, 2071 /*65*/ 0, 2072 0, 2073 0, 2074 0, 2075 0, 2076 /*70*/ MatGetRowMax_SeqBAIJ, 2077 MatConvert_Basic, 2078 0, 2079 0, 2080 0, 2081 /*75*/ 0, 2082 0, 2083 0, 2084 0, 2085 0, 2086 /*80*/ 0, 2087 0, 2088 0, 2089 0, 2090 /*85*/ MatLoad_SeqBAIJ, 2091 0, 2092 0, 2093 0, 2094 0 2095 }; 2096 2097 EXTERN_C_BEGIN 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2100 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2101 { 2102 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2103 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 if (aij->nonew != 1) { 2108 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2109 } 2110 2111 /* allocate space for values if not already there */ 2112 if (!aij->saved_values) { 2113 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2114 } 2115 2116 /* copy values over */ 2117 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 EXTERN_C_END 2121 2122 EXTERN_C_BEGIN 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2125 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2126 { 2127 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2128 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 2129 2130 PetscFunctionBegin; 2131 if (aij->nonew != 1) { 2132 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2133 } 2134 if (!aij->saved_values) { 2135 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2136 } 2137 2138 /* copy values over */ 2139 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 EXTERN_C_END 2143 2144 EXTERN_C_BEGIN 2145 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2146 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2147 EXTERN_C_END 2148 2149 EXTERN_C_BEGIN 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2152 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2153 { 2154 Mat_SeqBAIJ *b; 2155 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 2156 PetscTruth flg; 2157 2158 PetscFunctionBegin; 2159 2160 B->preallocated = PETSC_TRUE; 2161 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2162 if (nnz && newbs != bs) { 2163 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2164 } 2165 bs = newbs; 2166 2167 mbs = B->m/bs; 2168 nbs = B->n/bs; 2169 bs2 = bs*bs; 2170 2171 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2172 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2173 } 2174 2175 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2176 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2177 if (nnz) { 2178 for (i=0; i<mbs; i++) { 2179 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2180 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2181 } 2182 } 2183 2184 b = (Mat_SeqBAIJ*)B->data; 2185 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2186 B->ops->solve = MatSolve_SeqBAIJ_Update; 2187 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2188 if (!flg) { 2189 switch (bs) { 2190 case 1: 2191 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2192 B->ops->mult = MatMult_SeqBAIJ_1; 2193 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2194 break; 2195 case 2: 2196 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2197 B->ops->mult = MatMult_SeqBAIJ_2; 2198 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2199 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2200 break; 2201 case 3: 2202 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2203 B->ops->mult = MatMult_SeqBAIJ_3; 2204 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2205 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2206 break; 2207 case 4: 2208 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2209 B->ops->mult = MatMult_SeqBAIJ_4; 2210 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2211 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2212 break; 2213 case 5: 2214 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2215 B->ops->mult = MatMult_SeqBAIJ_5; 2216 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2217 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2218 break; 2219 case 6: 2220 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2221 B->ops->mult = MatMult_SeqBAIJ_6; 2222 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2223 break; 2224 case 7: 2225 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2226 B->ops->mult = MatMult_SeqBAIJ_7; 2227 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2228 break; 2229 default: 2230 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2231 B->ops->mult = MatMult_SeqBAIJ_N; 2232 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2233 break; 2234 } 2235 } 2236 b->bs = bs; 2237 b->mbs = mbs; 2238 b->nbs = nbs; 2239 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2240 if (!nnz) { 2241 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2242 else if (nz <= 0) nz = 1; 2243 for (i=0; i<mbs; i++) b->imax[i] = nz; 2244 nz = nz*mbs; 2245 } else { 2246 nz = 0; 2247 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2248 } 2249 2250 /* allocate the matrix space */ 2251 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2252 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2253 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2254 b->j = (int*)(b->a + nz*bs2); 2255 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2256 b->i = b->j + nz; 2257 b->singlemalloc = PETSC_TRUE; 2258 2259 b->i[0] = 0; 2260 for (i=1; i<mbs+1; i++) { 2261 b->i[i] = b->i[i-1] + b->imax[i-1]; 2262 } 2263 2264 /* b->ilen will count nonzeros in each block row so far. */ 2265 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2266 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2267 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2268 2269 b->bs = bs; 2270 b->bs2 = bs2; 2271 b->mbs = mbs; 2272 b->nz = 0; 2273 b->maxnz = nz*bs2; 2274 B->info.nz_unneeded = (PetscReal)b->maxnz; 2275 PetscFunctionReturn(0); 2276 } 2277 EXTERN_C_END 2278 2279 /*MC 2280 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2281 block sparse compressed row format. 2282 2283 Options Database Keys: 2284 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2285 2286 Level: beginner 2287 2288 .seealso: MatCreateSeqBAIJ 2289 M*/ 2290 2291 EXTERN_C_BEGIN 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatCreate_SeqBAIJ" 2294 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2295 { 2296 PetscErrorCode ierr; 2297 int size; 2298 Mat_SeqBAIJ *b; 2299 2300 PetscFunctionBegin; 2301 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2302 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2303 2304 B->m = B->M = PetscMax(B->m,B->M); 2305 B->n = B->N = PetscMax(B->n,B->N); 2306 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2307 B->data = (void*)b; 2308 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2309 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2310 B->factor = 0; 2311 B->lupivotthreshold = 1.0; 2312 B->mapping = 0; 2313 b->row = 0; 2314 b->col = 0; 2315 b->icol = 0; 2316 b->reallocs = 0; 2317 b->saved_values = 0; 2318 #if defined(PETSC_USE_MAT_SINGLE) 2319 b->setvalueslen = 0; 2320 b->setvaluescopy = PETSC_NULL; 2321 #endif 2322 2323 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2324 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2325 2326 b->sorted = PETSC_FALSE; 2327 b->roworiented = PETSC_TRUE; 2328 b->nonew = 0; 2329 b->diag = 0; 2330 b->solve_work = 0; 2331 b->mult_work = 0; 2332 B->spptr = 0; 2333 B->info.nz_unneeded = (PetscReal)b->maxnz; 2334 b->keepzeroedrows = PETSC_FALSE; 2335 b->xtoy = 0; 2336 b->XtoY = 0; 2337 2338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2339 "MatStoreValues_SeqBAIJ", 2340 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2342 "MatRetrieveValues_SeqBAIJ", 2343 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2345 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2346 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2348 "MatConvert_SeqBAIJ_SeqAIJ", 2349 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2351 "MatConvert_SeqBAIJ_SeqSBAIJ", 2352 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2354 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2355 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 EXTERN_C_END 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2362 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2363 { 2364 Mat C; 2365 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2366 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 2367 2368 PetscFunctionBegin; 2369 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2370 2371 *B = 0; 2372 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2373 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2374 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2375 c = (Mat_SeqBAIJ*)C->data; 2376 2377 C->M = A->M; 2378 C->N = A->N; 2379 c->bs = a->bs; 2380 c->bs2 = a->bs2; 2381 c->mbs = a->mbs; 2382 c->nbs = a->nbs; 2383 2384 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2385 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2386 for (i=0; i<mbs; i++) { 2387 c->imax[i] = a->imax[i]; 2388 c->ilen[i] = a->ilen[i]; 2389 } 2390 2391 /* allocate the matrix space */ 2392 c->singlemalloc = PETSC_TRUE; 2393 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2394 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2395 c->j = (int*)(c->a + nz*bs2); 2396 c->i = c->j + nz; 2397 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2398 if (mbs > 0) { 2399 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2400 if (cpvalues == MAT_COPY_VALUES) { 2401 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2402 } else { 2403 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2404 } 2405 } 2406 2407 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2408 c->sorted = a->sorted; 2409 c->roworiented = a->roworiented; 2410 c->nonew = a->nonew; 2411 2412 if (a->diag) { 2413 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2414 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2415 for (i=0; i<mbs; i++) { 2416 c->diag[i] = a->diag[i]; 2417 } 2418 } else c->diag = 0; 2419 c->nz = a->nz; 2420 c->maxnz = a->maxnz; 2421 c->solve_work = 0; 2422 c->mult_work = 0; 2423 C->preallocated = PETSC_TRUE; 2424 C->assembled = PETSC_TRUE; 2425 *B = C; 2426 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatLoad_SeqBAIJ" 2432 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2433 { 2434 Mat_SeqBAIJ *a; 2435 Mat B; 2436 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2437 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2438 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2439 int *masked,nmask,tmp,bs2,ishift; 2440 PetscScalar *aa; 2441 MPI_Comm comm = ((PetscObject)viewer)->comm; 2442 2443 PetscFunctionBegin; 2444 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2445 bs2 = bs*bs; 2446 2447 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2448 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2449 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2450 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2451 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2452 M = header[1]; N = header[2]; nz = header[3]; 2453 2454 if (header[3] < 0) { 2455 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2456 } 2457 2458 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2459 2460 /* 2461 This code adds extra rows to make sure the number of rows is 2462 divisible by the blocksize 2463 */ 2464 mbs = M/bs; 2465 extra_rows = bs - M + bs*(mbs); 2466 if (extra_rows == bs) extra_rows = 0; 2467 else mbs++; 2468 if (extra_rows) { 2469 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2470 } 2471 2472 /* read in row lengths */ 2473 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2474 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2475 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2476 2477 /* read in column indices */ 2478 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2479 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2480 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2481 2482 /* loop over row lengths determining block row lengths */ 2483 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2484 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2485 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2486 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2487 masked = mask + mbs; 2488 rowcount = 0; nzcount = 0; 2489 for (i=0; i<mbs; i++) { 2490 nmask = 0; 2491 for (j=0; j<bs; j++) { 2492 kmax = rowlengths[rowcount]; 2493 for (k=0; k<kmax; k++) { 2494 tmp = jj[nzcount++]/bs; 2495 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2496 } 2497 rowcount++; 2498 } 2499 browlengths[i] += nmask; 2500 /* zero out the mask elements we set */ 2501 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2502 } 2503 2504 /* create our matrix */ 2505 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2506 ierr = MatSetType(B,type);CHKERRQ(ierr); 2507 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2508 a = (Mat_SeqBAIJ*)B->data; 2509 2510 /* set matrix "i" values */ 2511 a->i[0] = 0; 2512 for (i=1; i<= mbs; i++) { 2513 a->i[i] = a->i[i-1] + browlengths[i-1]; 2514 a->ilen[i-1] = browlengths[i-1]; 2515 } 2516 a->nz = 0; 2517 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2518 2519 /* read in nonzero values */ 2520 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2521 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2522 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2523 2524 /* set "a" and "j" values into matrix */ 2525 nzcount = 0; jcount = 0; 2526 for (i=0; i<mbs; i++) { 2527 nzcountb = nzcount; 2528 nmask = 0; 2529 for (j=0; j<bs; j++) { 2530 kmax = rowlengths[i*bs+j]; 2531 for (k=0; k<kmax; k++) { 2532 tmp = jj[nzcount++]/bs; 2533 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2534 } 2535 } 2536 /* sort the masked values */ 2537 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2538 2539 /* set "j" values into matrix */ 2540 maskcount = 1; 2541 for (j=0; j<nmask; j++) { 2542 a->j[jcount++] = masked[j]; 2543 mask[masked[j]] = maskcount++; 2544 } 2545 /* set "a" values into matrix */ 2546 ishift = bs2*a->i[i]; 2547 for (j=0; j<bs; j++) { 2548 kmax = rowlengths[i*bs+j]; 2549 for (k=0; k<kmax; k++) { 2550 tmp = jj[nzcountb]/bs ; 2551 block = mask[tmp] - 1; 2552 point = jj[nzcountb] - bs*tmp; 2553 idx = ishift + bs2*block + j + bs*point; 2554 a->a[idx] = (MatScalar)aa[nzcountb++]; 2555 } 2556 } 2557 /* zero out the mask elements we set */ 2558 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2559 } 2560 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2561 2562 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2563 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2564 ierr = PetscFree(aa);CHKERRQ(ierr); 2565 ierr = PetscFree(jj);CHKERRQ(ierr); 2566 ierr = PetscFree(mask);CHKERRQ(ierr); 2567 2568 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2569 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2570 ierr = MatView_Private(B);CHKERRQ(ierr); 2571 2572 *A = B; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatCreateSeqBAIJ" 2578 /*@C 2579 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2580 compressed row) format. For good matrix assembly performance the 2581 user should preallocate the matrix storage by setting the parameter nz 2582 (or the array nnz). By setting these parameters accurately, performance 2583 during matrix assembly can be increased by more than a factor of 50. 2584 2585 Collective on MPI_Comm 2586 2587 Input Parameters: 2588 + comm - MPI communicator, set to PETSC_COMM_SELF 2589 . bs - size of block 2590 . m - number of rows 2591 . n - number of columns 2592 . nz - number of nonzero blocks per block row (same for all rows) 2593 - nnz - array containing the number of nonzero blocks in the various block rows 2594 (possibly different for each block row) or PETSC_NULL 2595 2596 Output Parameter: 2597 . A - the matrix 2598 2599 Options Database Keys: 2600 . -mat_no_unroll - uses code that does not unroll the loops in the 2601 block calculations (much slower) 2602 . -mat_block_size - size of the blocks to use 2603 2604 Level: intermediate 2605 2606 Notes: 2607 A nonzero block is any block that as 1 or more nonzeros in it 2608 2609 The block AIJ format is fully compatible with standard Fortran 77 2610 storage. That is, the stored row and column indices can begin at 2611 either one (as in Fortran) or zero. See the users' manual for details. 2612 2613 Specify the preallocated storage with either nz or nnz (not both). 2614 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2615 allocation. For additional details, see the users manual chapter on 2616 matrices. 2617 2618 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2619 @*/ 2620 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2621 { 2622 PetscErrorCode ierr; 2623 2624 PetscFunctionBegin; 2625 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2626 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2627 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 #undef __FUNCT__ 2632 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2633 /*@C 2634 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2635 per row in the matrix. For good matrix assembly performance the 2636 user should preallocate the matrix storage by setting the parameter nz 2637 (or the array nnz). By setting these parameters accurately, performance 2638 during matrix assembly can be increased by more than a factor of 50. 2639 2640 Collective on MPI_Comm 2641 2642 Input Parameters: 2643 + A - the matrix 2644 . bs - size of block 2645 . nz - number of block nonzeros per block row (same for all rows) 2646 - nnz - array containing the number of block nonzeros in the various block rows 2647 (possibly different for each block row) or PETSC_NULL 2648 2649 Options Database Keys: 2650 . -mat_no_unroll - uses code that does not unroll the loops in the 2651 block calculations (much slower) 2652 . -mat_block_size - size of the blocks to use 2653 2654 Level: intermediate 2655 2656 Notes: 2657 The block AIJ format is fully compatible with standard Fortran 77 2658 storage. That is, the stored row and column indices can begin at 2659 either one (as in Fortran) or zero. See the users' manual for details. 2660 2661 Specify the preallocated storage with either nz or nnz (not both). 2662 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2663 allocation. For additional details, see the users manual chapter on 2664 matrices. 2665 2666 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2667 @*/ 2668 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2669 { 2670 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 2671 2672 PetscFunctionBegin; 2673 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2674 if (f) { 2675 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2676 } 2677 PetscFunctionReturn(0); 2678 } 2679 2680