1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*ii = a->i,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !ii) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (diag[i] >= ii[i+1]) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1200 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1202 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1204 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1205 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1206 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1208 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1209 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1210 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1211 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1212 1213 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1214 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1215 ierr = PetscFree(A->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1234 { 1235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_ROW_ORIENTED: 1241 a->roworiented = flg; 1242 break; 1243 case MAT_KEEP_NONZERO_PATTERN: 1244 a->keepnonzeropattern = flg; 1245 break; 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 a->nonew = (flg ? 0 : 1); 1248 break; 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 a->nonew = (flg ? -1 : 0); 1251 break; 1252 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1253 a->nonew = (flg ? -2 : 0); 1254 break; 1255 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1256 a->nounused = (flg ? -1 : 0); 1257 break; 1258 case MAT_NEW_DIAGONALS: 1259 case MAT_IGNORE_OFF_PROC_ENTRIES: 1260 case MAT_USE_HASH_TABLE: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1280 { 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1283 MatScalar *aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 bs2 = bs*bs; 1289 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1290 1291 bn = row/bs; /* Block number */ 1292 bp = row % bs; /* Block Position */ 1293 M = ai[bn+1] - ai[bn]; 1294 *nz = bs*M; 1295 1296 if (v) { 1297 *v = 0; 1298 if (*nz) { 1299 ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr); 1300 for (i=0; i<M; i++) { /* for each block in the block row */ 1301 v_i = *v + i*bs; 1302 aa_i = aa + bs2*(ai[bn] + i); 1303 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1304 } 1305 } 1306 } 1307 1308 if (idx) { 1309 *idx = 0; 1310 if (*nz) { 1311 ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr); 1312 for (i=0; i<M; i++) { /* for each block in the block row */ 1313 idx_i = *idx + i*bs; 1314 itmp = bs*aj[ai[bn] + i]; 1315 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1336 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1337 { 1338 PetscErrorCode ierr; 1339 1340 PetscFunctionBegin; 1341 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1342 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1343 PetscFunctionReturn(0); 1344 } 1345 1346 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1350 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1351 { 1352 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1353 Mat C; 1354 PetscErrorCode ierr; 1355 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1356 PetscInt *rows,*cols,bs2=a->bs2; 1357 MatScalar *array; 1358 1359 PetscFunctionBegin; 1360 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1361 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1362 ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr); 1363 1364 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1365 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1366 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1367 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1368 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1369 ierr = PetscFree(col);CHKERRQ(ierr); 1370 } else { 1371 C = *B; 1372 } 1373 1374 array = a->a; 1375 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1376 for (i=0; i<mbs; i++) { 1377 cols[0] = i*bs; 1378 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1379 len = ai[i+1] - ai[i]; 1380 for (j=0; j<len; j++) { 1381 rows[0] = (*aj++)*bs; 1382 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1383 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1384 array += bs2; 1385 } 1386 } 1387 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1388 1389 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 1392 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1393 *B = C; 1394 } else { 1395 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1402 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1403 { 1404 PetscErrorCode ierr; 1405 Mat Btrans; 1406 1407 PetscFunctionBegin; 1408 *f = PETSC_FALSE; 1409 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1410 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1411 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1417 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1418 { 1419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1422 int fd; 1423 PetscScalar *aa; 1424 FILE *file; 1425 1426 PetscFunctionBegin; 1427 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1428 ierr = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr); 1429 col_lens[0] = MAT_FILE_CLASSID; 1430 1431 col_lens[1] = A->rmap->N; 1432 col_lens[2] = A->cmap->n; 1433 col_lens[3] = a->nz*bs2; 1434 1435 /* store lengths of each row and write (including header) to file */ 1436 count = 0; 1437 for (i=0; i<a->mbs; i++) { 1438 for (j=0; j<bs; j++) { 1439 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1440 } 1441 } 1442 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1443 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1444 1445 /* store column indices (zero start index) */ 1446 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1447 count = 0; 1448 for (i=0; i<a->mbs; i++) { 1449 for (j=0; j<bs; j++) { 1450 for (k=a->i[i]; k<a->i[i+1]; k++) { 1451 for (l=0; l<bs; l++) { 1452 jj[count++] = bs*a->j[k] + l; 1453 } 1454 } 1455 } 1456 } 1457 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1458 ierr = PetscFree(jj);CHKERRQ(ierr); 1459 1460 /* store nonzero values */ 1461 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1462 count = 0; 1463 for (i=0; i<a->mbs; i++) { 1464 for (j=0; j<bs; j++) { 1465 for (k=a->i[i]; k<a->i[i+1]; k++) { 1466 for (l=0; l<bs; l++) { 1467 aa[count++] = a->a[bs2*k + l*bs + j]; 1468 } 1469 } 1470 } 1471 } 1472 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1473 ierr = PetscFree(aa);CHKERRQ(ierr); 1474 1475 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1476 if (file) { 1477 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1484 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1485 { 1486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1487 PetscErrorCode ierr; 1488 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1489 PetscViewerFormat format; 1490 1491 PetscFunctionBegin; 1492 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1493 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1494 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1495 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1496 Mat aij; 1497 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1498 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1499 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1500 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1501 PetscFunctionReturn(0); 1502 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1503 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1504 for (i=0; i<a->mbs; i++) { 1505 for (j=0; j<bs; j++) { 1506 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1507 for (k=a->i[i]; k<a->i[i+1]; k++) { 1508 for (l=0; l<bs; l++) { 1509 #if defined(PETSC_USE_COMPLEX) 1510 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1511 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1512 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1513 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1514 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1515 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1516 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1517 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1518 } 1519 #else 1520 if (a->a[bs2*k + l*bs + j] != 0.0) { 1521 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1522 } 1523 #endif 1524 } 1525 } 1526 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1527 } 1528 } 1529 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1530 } else { 1531 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1532 for (i=0; i<a->mbs; i++) { 1533 for (j=0; j<bs; j++) { 1534 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1535 for (k=a->i[i]; k<a->i[i+1]; k++) { 1536 for (l=0; l<bs; l++) { 1537 #if defined(PETSC_USE_COMPLEX) 1538 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1543 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1544 } else { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1546 } 1547 #else 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1549 #endif 1550 } 1551 } 1552 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1553 } 1554 } 1555 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1556 } 1557 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #include <petscdraw.h> 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1564 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1565 { 1566 Mat A = (Mat) Aa; 1567 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1568 PetscErrorCode ierr; 1569 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1570 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1571 MatScalar *aa; 1572 PetscViewer viewer; 1573 PetscViewerFormat format; 1574 1575 PetscFunctionBegin; 1576 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1577 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1578 1579 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1580 1581 /* loop over matrix elements drawing boxes */ 1582 1583 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1584 color = PETSC_DRAW_BLUE; 1585 for (i=0,row=0; i<mbs; i++,row+=bs) { 1586 for (j=a->i[i]; j<a->i[i+1]; j++) { 1587 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1588 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1589 aa = a->a + j*bs2; 1590 for (k=0; k<bs; k++) { 1591 for (l=0; l<bs; l++) { 1592 if (PetscRealPart(*aa++) >= 0.) continue; 1593 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1594 } 1595 } 1596 } 1597 } 1598 color = PETSC_DRAW_CYAN; 1599 for (i=0,row=0; i<mbs; i++,row+=bs) { 1600 for (j=a->i[i]; j<a->i[i+1]; j++) { 1601 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1602 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1603 aa = a->a + j*bs2; 1604 for (k=0; k<bs; k++) { 1605 for (l=0; l<bs; l++) { 1606 if (PetscRealPart(*aa++) != 0.) continue; 1607 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1608 } 1609 } 1610 } 1611 } 1612 color = PETSC_DRAW_RED; 1613 for (i=0,row=0; i<mbs; i++,row+=bs) { 1614 for (j=a->i[i]; j<a->i[i+1]; j++) { 1615 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1616 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1617 aa = a->a + j*bs2; 1618 for (k=0; k<bs; k++) { 1619 for (l=0; l<bs; l++) { 1620 if (PetscRealPart(*aa++) <= 0.) continue; 1621 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1622 } 1623 } 1624 } 1625 } 1626 } else { 1627 /* use contour shading to indicate magnitude of values */ 1628 /* first determine max of all nonzero values */ 1629 PetscDraw popup; 1630 PetscReal scale,maxv = 0.0; 1631 1632 for (i=0; i<a->nz*a->bs2; i++) { 1633 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1634 } 1635 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1636 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1637 if (popup) { 1638 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1639 } 1640 for (i=0,row=0; i<mbs; i++,row+=bs) { 1641 for (j=a->i[i]; j<a->i[i+1]; j++) { 1642 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1644 aa = a->a + j*bs2; 1645 for (k=0; k<bs; k++) { 1646 for (l=0; l<bs; l++) { 1647 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 } 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1659 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1660 { 1661 PetscErrorCode ierr; 1662 PetscReal xl,yl,xr,yr,w,h; 1663 PetscDraw draw; 1664 PetscBool isnull; 1665 1666 PetscFunctionBegin; 1667 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1668 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1669 1670 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1671 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1672 xr += w; yr += h; xl = -w; yl = -h; 1673 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1674 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1675 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatView_SeqBAIJ" 1681 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1682 { 1683 PetscErrorCode ierr; 1684 PetscBool iascii,isbinary,isdraw; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1689 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1690 if (iascii) { 1691 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1692 } else if (isbinary) { 1693 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1694 } else if (isdraw) { 1695 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1696 } else { 1697 Mat B; 1698 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1699 ierr = MatView(B,viewer);CHKERRQ(ierr); 1700 ierr = MatDestroy(&B);CHKERRQ(ierr); 1701 } 1702 PetscFunctionReturn(0); 1703 } 1704 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1708 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1709 { 1710 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1711 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1712 PetscInt *ai = a->i,*ailen = a->ilen; 1713 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1714 MatScalar *ap,*aa = a->a; 1715 1716 PetscFunctionBegin; 1717 for (k=0; k<m; k++) { /* loop over rows */ 1718 row = im[k]; brow = row/bs; 1719 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1720 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1721 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1722 nrow = ailen[brow]; 1723 for (l=0; l<n; l++) { /* loop over columns */ 1724 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1725 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1726 col = in[l]; 1727 bcol = col/bs; 1728 cidx = col%bs; 1729 ridx = row%bs; 1730 high = nrow; 1731 low = 0; /* assume unsorted */ 1732 while (high-low > 5) { 1733 t = (low+high)/2; 1734 if (rp[t] > bcol) high = t; 1735 else low = t; 1736 } 1737 for (i=low; i<high; i++) { 1738 if (rp[i] > bcol) break; 1739 if (rp[i] == bcol) { 1740 *v++ = ap[bs2*i+bs*cidx+ridx]; 1741 goto finished; 1742 } 1743 } 1744 *v++ = 0.0; 1745 finished:; 1746 } 1747 } 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1753 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1754 { 1755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1756 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1757 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1758 PetscErrorCode ierr; 1759 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1760 PetscBool roworiented=a->roworiented; 1761 const PetscScalar *value = v; 1762 MatScalar *ap,*aa = a->a,*bap; 1763 1764 PetscFunctionBegin; 1765 if (roworiented) { 1766 stepval = (n-1)*bs; 1767 } else { 1768 stepval = (m-1)*bs; 1769 } 1770 for (k=0; k<m; k++) { /* loop over added rows */ 1771 row = im[k]; 1772 if (row < 0) continue; 1773 #if defined(PETSC_USE_DEBUG) 1774 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1775 #endif 1776 rp = aj + ai[row]; 1777 ap = aa + bs2*ai[row]; 1778 rmax = imax[row]; 1779 nrow = ailen[row]; 1780 low = 0; 1781 high = nrow; 1782 for (l=0; l<n; l++) { /* loop over added columns */ 1783 if (in[l] < 0) continue; 1784 #if defined(PETSC_USE_DEBUG) 1785 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1786 #endif 1787 col = in[l]; 1788 if (roworiented) { 1789 value = v + (k*(stepval+bs) + l)*bs; 1790 } else { 1791 value = v + (l*(stepval+bs) + k)*bs; 1792 } 1793 if (col <= lastcol) low = 0; 1794 else high = nrow; 1795 lastcol = col; 1796 while (high-low > 7) { 1797 t = (low+high)/2; 1798 if (rp[t] > col) high = t; 1799 else low = t; 1800 } 1801 for (i=low; i<high; i++) { 1802 if (rp[i] > col) break; 1803 if (rp[i] == col) { 1804 bap = ap + bs2*i; 1805 if (roworiented) { 1806 if (is == ADD_VALUES) { 1807 for (ii=0; ii<bs; ii++,value+=stepval) { 1808 for (jj=ii; jj<bs2; jj+=bs) { 1809 bap[jj] += *value++; 1810 } 1811 } 1812 } else { 1813 for (ii=0; ii<bs; ii++,value+=stepval) { 1814 for (jj=ii; jj<bs2; jj+=bs) { 1815 bap[jj] = *value++; 1816 } 1817 } 1818 } 1819 } else { 1820 if (is == ADD_VALUES) { 1821 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1822 for (jj=0; jj<bs; jj++) { 1823 bap[jj] += value[jj]; 1824 } 1825 bap += bs; 1826 } 1827 } else { 1828 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1829 for (jj=0; jj<bs; jj++) { 1830 bap[jj] = value[jj]; 1831 } 1832 bap += bs; 1833 } 1834 } 1835 } 1836 goto noinsert2; 1837 } 1838 } 1839 if (nonew == 1) goto noinsert2; 1840 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1841 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1842 N = nrow++ - 1; high++; 1843 /* shift up all the later entries in this row */ 1844 for (ii=N; ii>=i; ii--) { 1845 rp[ii+1] = rp[ii]; 1846 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1847 } 1848 if (N >= i) { 1849 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1850 } 1851 rp[i] = col; 1852 bap = ap + bs2*i; 1853 if (roworiented) { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=ii; jj<bs2; jj+=bs) { 1856 bap[jj] = *value++; 1857 } 1858 } 1859 } else { 1860 for (ii=0; ii<bs; ii++,value+=stepval) { 1861 for (jj=0; jj<bs; jj++) { 1862 *bap++ = *value++; 1863 } 1864 } 1865 } 1866 noinsert2:; 1867 low = i; 1868 } 1869 ailen[row] = nrow; 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1876 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1877 { 1878 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1879 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1880 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1881 PetscErrorCode ierr; 1882 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1883 MatScalar *aa = a->a,*ap; 1884 PetscReal ratio=0.6; 1885 1886 PetscFunctionBegin; 1887 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1888 1889 if (m) rmax = ailen[0]; 1890 for (i=1; i<mbs; i++) { 1891 /* move each row back by the amount of empty slots (fshift) before it*/ 1892 fshift += imax[i-1] - ailen[i-1]; 1893 rmax = PetscMax(rmax,ailen[i]); 1894 if (fshift) { 1895 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1896 N = ailen[i]; 1897 for (j=0; j<N; j++) { 1898 ip[j-fshift] = ip[j]; 1899 1900 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1901 } 1902 } 1903 ai[i] = ai[i-1] + ailen[i-1]; 1904 } 1905 if (mbs) { 1906 fshift += imax[mbs-1] - ailen[mbs-1]; 1907 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1908 } 1909 1910 /* reset ilen and imax for each row */ 1911 a->nonzerorowcnt = 0; 1912 for (i=0; i<mbs; i++) { 1913 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1914 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1915 } 1916 a->nz = ai[mbs]; 1917 1918 /* diagonals may have moved, so kill the diagonal pointers */ 1919 a->idiagvalid = PETSC_FALSE; 1920 if (fshift && a->diag) { 1921 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1922 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1923 a->diag = 0; 1924 } 1925 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1926 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1927 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1928 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1929 1930 A->info.mallocs += a->reallocs; 1931 a->reallocs = 0; 1932 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1933 a->rmax = rmax; 1934 1935 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 /* 1940 This function returns an array of flags which indicate the locations of contiguous 1941 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1942 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1943 Assume: sizes should be long enough to hold all the values. 1944 */ 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1947 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1948 { 1949 PetscInt i,j,k,row; 1950 PetscBool flg; 1951 1952 PetscFunctionBegin; 1953 for (i=0,j=0; i<n; j++) { 1954 row = idx[i]; 1955 if (row%bs!=0) { /* Not the begining of a block */ 1956 sizes[j] = 1; 1957 i++; 1958 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1959 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1960 i++; 1961 } else { /* Begining of the block, so check if the complete block exists */ 1962 flg = PETSC_TRUE; 1963 for (k=1; k<bs; k++) { 1964 if (row+k != idx[i+k]) { /* break in the block */ 1965 flg = PETSC_FALSE; 1966 break; 1967 } 1968 } 1969 if (flg) { /* No break in the bs */ 1970 sizes[j] = bs; 1971 i += bs; 1972 } else { 1973 sizes[j] = 1; 1974 i++; 1975 } 1976 } 1977 } 1978 *bs_max = j; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1984 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1985 { 1986 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1987 PetscErrorCode ierr; 1988 PetscInt i,j,k,count,*rows; 1989 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1990 PetscScalar zero = 0.0; 1991 MatScalar *aa; 1992 const PetscScalar *xx; 1993 PetscScalar *bb; 1994 1995 PetscFunctionBegin; 1996 /* fix right hand side if needed */ 1997 if (x && b) { 1998 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1999 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2000 for (i=0; i<is_n; i++) { 2001 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2002 } 2003 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2004 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2005 } 2006 2007 /* Make a copy of the IS and sort it */ 2008 /* allocate memory for rows,sizes */ 2009 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2010 2011 /* copy IS values to rows, and sort them */ 2012 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2013 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2014 2015 if (baij->keepnonzeropattern) { 2016 for (i=0; i<is_n; i++) sizes[i] = 1; 2017 bs_max = is_n; 2018 } else { 2019 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2020 A->nonzerostate++; 2021 } 2022 2023 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2024 row = rows[j]; 2025 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2026 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2027 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2028 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2029 if (diag != (PetscScalar)0.0) { 2030 if (baij->ilen[row/bs] > 0) { 2031 baij->ilen[row/bs] = 1; 2032 baij->j[baij->i[row/bs]] = row/bs; 2033 2034 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2035 } 2036 /* Now insert all the diagonal values for this bs */ 2037 for (k=0; k<bs; k++) { 2038 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2039 } 2040 } else { /* (diag == 0.0) */ 2041 baij->ilen[row/bs] = 0; 2042 } /* end (diag == 0.0) */ 2043 } else { /* (sizes[i] != bs) */ 2044 #if defined(PETSC_USE_DEBUG) 2045 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2046 #endif 2047 for (k=0; k<count; k++) { 2048 aa[0] = zero; 2049 aa += bs; 2050 } 2051 if (diag != (PetscScalar)0.0) { 2052 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2053 } 2054 } 2055 } 2056 2057 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2058 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 #undef __FUNCT__ 2063 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2064 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2065 { 2066 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2067 PetscErrorCode ierr; 2068 PetscInt i,j,k,count; 2069 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2070 PetscScalar zero = 0.0; 2071 MatScalar *aa; 2072 const PetscScalar *xx; 2073 PetscScalar *bb; 2074 PetscBool *zeroed,vecs = PETSC_FALSE; 2075 2076 PetscFunctionBegin; 2077 /* fix right hand side if needed */ 2078 if (x && b) { 2079 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2080 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2081 vecs = PETSC_TRUE; 2082 } 2083 2084 /* zero the columns */ 2085 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2086 for (i=0; i<is_n; i++) { 2087 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2088 zeroed[is_idx[i]] = PETSC_TRUE; 2089 } 2090 for (i=0; i<A->rmap->N; i++) { 2091 if (!zeroed[i]) { 2092 row = i/bs; 2093 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2094 for (k=0; k<bs; k++) { 2095 col = bs*baij->j[j] + k; 2096 if (zeroed[col]) { 2097 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2098 if (vecs) bb[i] -= aa[0]*xx[col]; 2099 aa[0] = 0.0; 2100 } 2101 } 2102 } 2103 } else if (vecs) bb[i] = diag*xx[i]; 2104 } 2105 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2106 if (vecs) { 2107 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2108 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2109 } 2110 2111 /* zero the rows */ 2112 for (i=0; i<is_n; i++) { 2113 row = is_idx[i]; 2114 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2115 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2116 for (k=0; k<count; k++) { 2117 aa[0] = zero; 2118 aa += bs; 2119 } 2120 if (diag != (PetscScalar)0.0) { 2121 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2122 } 2123 } 2124 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2130 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2131 { 2132 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2133 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2134 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2135 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2136 PetscErrorCode ierr; 2137 PetscInt ridx,cidx,bs2=a->bs2; 2138 PetscBool roworiented=a->roworiented; 2139 MatScalar *ap,value,*aa=a->a,*bap; 2140 2141 PetscFunctionBegin; 2142 for (k=0; k<m; k++) { /* loop over added rows */ 2143 row = im[k]; 2144 brow = row/bs; 2145 if (row < 0) continue; 2146 #if defined(PETSC_USE_DEBUG) 2147 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2148 #endif 2149 rp = aj + ai[brow]; 2150 ap = aa + bs2*ai[brow]; 2151 rmax = imax[brow]; 2152 nrow = ailen[brow]; 2153 low = 0; 2154 high = nrow; 2155 for (l=0; l<n; l++) { /* loop over added columns */ 2156 if (in[l] < 0) continue; 2157 #if defined(PETSC_USE_DEBUG) 2158 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2159 #endif 2160 col = in[l]; bcol = col/bs; 2161 ridx = row % bs; cidx = col % bs; 2162 if (roworiented) { 2163 value = v[l + k*n]; 2164 } else { 2165 value = v[k + l*m]; 2166 } 2167 if (col <= lastcol) low = 0; else high = nrow; 2168 lastcol = col; 2169 while (high-low > 7) { 2170 t = (low+high)/2; 2171 if (rp[t] > bcol) high = t; 2172 else low = t; 2173 } 2174 for (i=low; i<high; i++) { 2175 if (rp[i] > bcol) break; 2176 if (rp[i] == bcol) { 2177 bap = ap + bs2*i + bs*cidx + ridx; 2178 if (is == ADD_VALUES) *bap += value; 2179 else *bap = value; 2180 goto noinsert1; 2181 } 2182 } 2183 if (nonew == 1) goto noinsert1; 2184 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2185 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2186 N = nrow++ - 1; high++; 2187 /* shift up all the later entries in this row */ 2188 for (ii=N; ii>=i; ii--) { 2189 rp[ii+1] = rp[ii]; 2190 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2191 } 2192 if (N>=i) { 2193 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2194 } 2195 rp[i] = bcol; 2196 ap[bs2*i + bs*cidx + ridx] = value; 2197 a->nz++; 2198 A->nonzerostate++; 2199 noinsert1:; 2200 low = i; 2201 } 2202 ailen[brow] = nrow; 2203 } 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2209 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2210 { 2211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2212 Mat outA; 2213 PetscErrorCode ierr; 2214 PetscBool row_identity,col_identity; 2215 2216 PetscFunctionBegin; 2217 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2218 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2219 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2220 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2221 2222 outA = inA; 2223 inA->factortype = MAT_FACTOR_LU; 2224 2225 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2226 2227 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2228 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2229 a->row = row; 2230 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2231 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2232 a->col = col; 2233 2234 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2235 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2236 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2237 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2238 2239 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2240 if (!a->solve_work) { 2241 ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr); 2242 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2243 } 2244 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2250 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2251 { 2252 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2253 PetscInt i,nz,mbs; 2254 2255 PetscFunctionBegin; 2256 nz = baij->maxnz; 2257 mbs = baij->mbs; 2258 for (i=0; i<nz; i++) { 2259 baij->j[i] = indices[i]; 2260 } 2261 baij->nz = nz; 2262 for (i=0; i<mbs; i++) { 2263 baij->ilen[i] = baij->imax[i]; 2264 } 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2270 /*@ 2271 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2272 in the matrix. 2273 2274 Input Parameters: 2275 + mat - the SeqBAIJ matrix 2276 - indices - the column indices 2277 2278 Level: advanced 2279 2280 Notes: 2281 This can be called if you have precomputed the nonzero structure of the 2282 matrix and want to provide it to the matrix object to improve the performance 2283 of the MatSetValues() operation. 2284 2285 You MUST have set the correct numbers of nonzeros per row in the call to 2286 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2287 2288 MUST be called before any calls to MatSetValues(); 2289 2290 @*/ 2291 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2292 { 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2297 PetscValidPointer(indices,2); 2298 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2304 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2305 { 2306 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2309 PetscReal atmp; 2310 PetscScalar *x,zero = 0.0; 2311 MatScalar *aa; 2312 PetscInt ncols,brow,krow,kcol; 2313 2314 PetscFunctionBegin; 2315 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2316 bs = A->rmap->bs; 2317 aa = a->a; 2318 ai = a->i; 2319 aj = a->j; 2320 mbs = a->mbs; 2321 2322 ierr = VecSet(v,zero);CHKERRQ(ierr); 2323 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2324 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2325 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2326 for (i=0; i<mbs; i++) { 2327 ncols = ai[1] - ai[0]; ai++; 2328 brow = bs*i; 2329 for (j=0; j<ncols; j++) { 2330 for (kcol=0; kcol<bs; kcol++) { 2331 for (krow=0; krow<bs; krow++) { 2332 atmp = PetscAbsScalar(*aa);aa++; 2333 row = brow + krow; /* row index */ 2334 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2335 } 2336 } 2337 aj++; 2338 } 2339 } 2340 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatCopy_SeqBAIJ" 2346 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2347 { 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 /* If the two matrices have the same copy implementation, use fast copy. */ 2352 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2354 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2355 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2356 2357 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2358 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2359 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2360 } else { 2361 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2362 } 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2368 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2369 { 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2379 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2380 { 2381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2382 2383 PetscFunctionBegin; 2384 *array = a->a; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2390 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2391 { 2392 PetscFunctionBegin; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2398 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2399 { 2400 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2401 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2402 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 /* Set the number of nonzeros in the new matrix */ 2407 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2413 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2414 { 2415 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2416 PetscErrorCode ierr; 2417 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2418 PetscBLASInt one=1; 2419 2420 PetscFunctionBegin; 2421 if (str == SAME_NONZERO_PATTERN) { 2422 PetscScalar alpha = a; 2423 PetscBLASInt bnz; 2424 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2425 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2426 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2427 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2428 if (y->xtoy && y->XtoY != X) { 2429 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2430 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2431 } 2432 if (!y->xtoy) { /* get xtoy */ 2433 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2434 y->XtoY = X; 2435 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2436 } 2437 for (i=0; i<x->nz; i++) { 2438 j = 0; 2439 while (j < bs2) { 2440 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2441 j++; 2442 } 2443 } 2444 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2445 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2446 } else { 2447 Mat B; 2448 PetscInt *nnz; 2449 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2450 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2451 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2452 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2453 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2454 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2455 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2456 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2457 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2458 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2459 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2460 ierr = PetscFree(nnz);CHKERRQ(ierr); 2461 } 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2467 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2468 { 2469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2470 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2471 MatScalar *aa = a->a; 2472 2473 PetscFunctionBegin; 2474 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2480 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2484 MatScalar *aa = a->a; 2485 2486 PetscFunctionBegin; 2487 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2493 /* 2494 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2495 */ 2496 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2497 { 2498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2499 PetscErrorCode ierr; 2500 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2501 PetscInt nz = a->i[m],row,*jj,mr,col; 2502 2503 PetscFunctionBegin; 2504 *nn = n; 2505 if (!ia) PetscFunctionReturn(0); 2506 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2507 else { 2508 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2509 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2510 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2511 jj = a->j; 2512 for (i=0; i<nz; i++) { 2513 collengths[jj[i]]++; 2514 } 2515 cia[0] = oshift; 2516 for (i=0; i<n; i++) { 2517 cia[i+1] = cia[i] + collengths[i]; 2518 } 2519 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2520 jj = a->j; 2521 for (row=0; row<m; row++) { 2522 mr = a->i[row+1] - a->i[row]; 2523 for (i=0; i<mr; i++) { 2524 col = *jj++; 2525 2526 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2527 } 2528 } 2529 ierr = PetscFree(collengths);CHKERRQ(ierr); 2530 *ia = cia; *ja = cja; 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2537 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 if (!ia) PetscFunctionReturn(0); 2543 ierr = PetscFree(*ia);CHKERRQ(ierr); 2544 ierr = PetscFree(*ja);CHKERRQ(ierr); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /* 2549 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2550 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2551 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2552 */ 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2555 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2556 { 2557 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2558 PetscErrorCode ierr; 2559 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2560 PetscInt nz = a->i[m],row,*jj,mr,col; 2561 PetscInt *cspidx; 2562 2563 PetscFunctionBegin; 2564 *nn = n; 2565 if (!ia) PetscFunctionReturn(0); 2566 2567 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2568 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2569 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2570 ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr); 2571 jj = a->j; 2572 for (i=0; i<nz; i++) { 2573 collengths[jj[i]]++; 2574 } 2575 cia[0] = oshift; 2576 for (i=0; i<n; i++) { 2577 cia[i+1] = cia[i] + collengths[i]; 2578 } 2579 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2580 jj = a->j; 2581 for (row=0; row<m; row++) { 2582 mr = a->i[row+1] - a->i[row]; 2583 for (i=0; i<mr; i++) { 2584 col = *jj++; 2585 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2586 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2587 } 2588 } 2589 ierr = PetscFree(collengths);CHKERRQ(ierr); 2590 *ia = cia; *ja = cja; 2591 *spidx = cspidx; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2597 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2598 { 2599 PetscErrorCode ierr; 2600 2601 PetscFunctionBegin; 2602 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2603 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 /* -------------------------------------------------------------------*/ 2608 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2609 MatGetRow_SeqBAIJ, 2610 MatRestoreRow_SeqBAIJ, 2611 MatMult_SeqBAIJ_N, 2612 /* 4*/ MatMultAdd_SeqBAIJ_N, 2613 MatMultTranspose_SeqBAIJ, 2614 MatMultTransposeAdd_SeqBAIJ, 2615 0, 2616 0, 2617 0, 2618 /* 10*/ 0, 2619 MatLUFactor_SeqBAIJ, 2620 0, 2621 0, 2622 MatTranspose_SeqBAIJ, 2623 /* 15*/ MatGetInfo_SeqBAIJ, 2624 MatEqual_SeqBAIJ, 2625 MatGetDiagonal_SeqBAIJ, 2626 MatDiagonalScale_SeqBAIJ, 2627 MatNorm_SeqBAIJ, 2628 /* 20*/ 0, 2629 MatAssemblyEnd_SeqBAIJ, 2630 MatSetOption_SeqBAIJ, 2631 MatZeroEntries_SeqBAIJ, 2632 /* 24*/ MatZeroRows_SeqBAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /* 29*/ MatSetUp_SeqBAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /* 34*/ MatDuplicate_SeqBAIJ, 2643 0, 2644 0, 2645 MatILUFactor_SeqBAIJ, 2646 0, 2647 /* 39*/ MatAXPY_SeqBAIJ, 2648 MatGetSubMatrices_SeqBAIJ, 2649 MatIncreaseOverlap_SeqBAIJ, 2650 MatGetValues_SeqBAIJ, 2651 MatCopy_SeqBAIJ, 2652 /* 44*/ 0, 2653 MatScale_SeqBAIJ, 2654 0, 2655 0, 2656 MatZeroRowsColumns_SeqBAIJ, 2657 /* 49*/ 0, 2658 MatGetRowIJ_SeqBAIJ, 2659 MatRestoreRowIJ_SeqBAIJ, 2660 MatGetColumnIJ_SeqBAIJ, 2661 MatRestoreColumnIJ_SeqBAIJ, 2662 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2663 0, 2664 0, 2665 0, 2666 MatSetValuesBlocked_SeqBAIJ, 2667 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2668 MatDestroy_SeqBAIJ, 2669 MatView_SeqBAIJ, 2670 0, 2671 0, 2672 /* 64*/ 0, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2678 0, 2679 MatConvert_Basic, 2680 0, 2681 0, 2682 /* 74*/ 0, 2683 MatFDColoringApply_BAIJ, 2684 0, 2685 0, 2686 0, 2687 /* 79*/ 0, 2688 0, 2689 0, 2690 0, 2691 MatLoad_SeqBAIJ, 2692 /* 84*/ 0, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /* 89*/ 0, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /* 94*/ 0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /* 99*/ 0, 2708 0, 2709 0, 2710 0, 2711 0, 2712 /*104*/ 0, 2713 MatRealPart_SeqBAIJ, 2714 MatImaginaryPart_SeqBAIJ, 2715 0, 2716 0, 2717 /*109*/ 0, 2718 0, 2719 0, 2720 0, 2721 MatMissingDiagonal_SeqBAIJ, 2722 /*114*/ 0, 2723 0, 2724 0, 2725 0, 2726 0, 2727 /*119*/ 0, 2728 0, 2729 MatMultHermitianTranspose_SeqBAIJ, 2730 MatMultHermitianTransposeAdd_SeqBAIJ, 2731 0, 2732 /*124*/ 0, 2733 0, 2734 MatInvertBlockDiagonal_SeqBAIJ, 2735 0, 2736 0, 2737 /*129*/ 0, 2738 0, 2739 0, 2740 0, 2741 0, 2742 /*134*/ 0, 2743 0, 2744 0, 2745 0, 2746 0, 2747 /*139*/ 0, 2748 0, 2749 0, 2750 MatFDColoringSetUp_SeqXAIJ, 2751 0, 2752 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2753 }; 2754 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2757 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2758 { 2759 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2760 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2761 PetscErrorCode ierr; 2762 2763 PetscFunctionBegin; 2764 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2765 2766 /* allocate space for values if not already there */ 2767 if (!aij->saved_values) { 2768 ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr); 2769 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2770 } 2771 2772 /* copy values over */ 2773 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2779 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2780 { 2781 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2782 PetscErrorCode ierr; 2783 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2784 2785 PetscFunctionBegin; 2786 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2787 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2788 2789 /* copy values over */ 2790 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2791 PetscFunctionReturn(0); 2792 } 2793 2794 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2795 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2796 2797 #undef __FUNCT__ 2798 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2799 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2800 { 2801 Mat_SeqBAIJ *b; 2802 PetscErrorCode ierr; 2803 PetscInt i,mbs,nbs,bs2; 2804 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2805 2806 PetscFunctionBegin; 2807 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2808 if (nz == MAT_SKIP_ALLOCATION) { 2809 skipallocation = PETSC_TRUE; 2810 nz = 0; 2811 } 2812 2813 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2814 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2815 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2817 2818 B->preallocated = PETSC_TRUE; 2819 2820 mbs = B->rmap->n/bs; 2821 nbs = B->cmap->n/bs; 2822 bs2 = bs*bs; 2823 2824 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2825 2826 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2827 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2828 if (nnz) { 2829 for (i=0; i<mbs; i++) { 2830 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2831 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2832 } 2833 } 2834 2835 b = (Mat_SeqBAIJ*)B->data; 2836 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2837 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2838 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2839 2840 if (!flg) { 2841 switch (bs) { 2842 case 1: 2843 B->ops->mult = MatMult_SeqBAIJ_1; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2845 break; 2846 case 2: 2847 B->ops->mult = MatMult_SeqBAIJ_2; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2849 break; 2850 case 3: 2851 B->ops->mult = MatMult_SeqBAIJ_3; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2853 break; 2854 case 4: 2855 B->ops->mult = MatMult_SeqBAIJ_4; 2856 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2857 break; 2858 case 5: 2859 B->ops->mult = MatMult_SeqBAIJ_5; 2860 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2861 break; 2862 case 6: 2863 B->ops->mult = MatMult_SeqBAIJ_6; 2864 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2865 break; 2866 case 7: 2867 B->ops->mult = MatMult_SeqBAIJ_7; 2868 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2869 break; 2870 case 15: 2871 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2872 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2873 break; 2874 default: 2875 B->ops->mult = MatMult_SeqBAIJ_N; 2876 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2877 break; 2878 } 2879 } 2880 B->ops->sor = MatSOR_SeqBAIJ; 2881 b->mbs = mbs; 2882 b->nbs = nbs; 2883 if (!skipallocation) { 2884 if (!b->imax) { 2885 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2886 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2887 2888 b->free_imax_ilen = PETSC_TRUE; 2889 } 2890 /* b->ilen will count nonzeros in each block row so far. */ 2891 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2892 if (!nnz) { 2893 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2894 else if (nz < 0) nz = 1; 2895 for (i=0; i<mbs; i++) b->imax[i] = nz; 2896 nz = nz*mbs; 2897 } else { 2898 nz = 0; 2899 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2900 } 2901 2902 /* allocate the matrix space */ 2903 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2904 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2905 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2906 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2907 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2908 2909 b->singlemalloc = PETSC_TRUE; 2910 b->i[0] = 0; 2911 for (i=1; i<mbs+1; i++) { 2912 b->i[i] = b->i[i-1] + b->imax[i-1]; 2913 } 2914 b->free_a = PETSC_TRUE; 2915 b->free_ij = PETSC_TRUE; 2916 #if defined(PETSC_THREADCOMM_ACTIVE) 2917 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2918 #endif 2919 } else { 2920 b->free_a = PETSC_FALSE; 2921 b->free_ij = PETSC_FALSE; 2922 } 2923 2924 b->bs2 = bs2; 2925 b->mbs = mbs; 2926 b->nz = 0; 2927 b->maxnz = nz; 2928 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2929 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2935 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2936 { 2937 PetscInt i,m,nz,nz_max=0,*nnz; 2938 PetscScalar *values=0; 2939 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2944 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2945 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2946 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2947 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2948 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2949 m = B->rmap->n/bs; 2950 2951 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2952 ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr); 2953 for (i=0; i<m; i++) { 2954 nz = ii[i+1]- ii[i]; 2955 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2956 nz_max = PetscMax(nz_max, nz); 2957 nnz[i] = nz; 2958 } 2959 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2960 ierr = PetscFree(nnz);CHKERRQ(ierr); 2961 2962 values = (PetscScalar*)V; 2963 if (!values) { 2964 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2965 } 2966 for (i=0; i<m; i++) { 2967 PetscInt ncols = ii[i+1] - ii[i]; 2968 const PetscInt *icols = jj + ii[i]; 2969 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2970 if (!roworiented) { 2971 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2972 } else { 2973 PetscInt j; 2974 for (j=0; j<ncols; j++) { 2975 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2976 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2977 } 2978 } 2979 } 2980 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2981 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2988 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 2989 #if defined(PETSC_HAVE_MUMPS) 2990 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2991 #endif 2992 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 2993 2994 /*MC 2995 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2996 block sparse compressed row format. 2997 2998 Options Database Keys: 2999 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3000 3001 Level: beginner 3002 3003 .seealso: MatCreateSeqBAIJ() 3004 M*/ 3005 3006 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatCreate_SeqBAIJ" 3010 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3011 { 3012 PetscErrorCode ierr; 3013 PetscMPIInt size; 3014 Mat_SeqBAIJ *b; 3015 3016 PetscFunctionBegin; 3017 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3018 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3019 3020 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3021 B->data = (void*)b; 3022 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3023 3024 b->row = 0; 3025 b->col = 0; 3026 b->icol = 0; 3027 b->reallocs = 0; 3028 b->saved_values = 0; 3029 3030 b->roworiented = PETSC_TRUE; 3031 b->nonew = 0; 3032 b->diag = 0; 3033 b->solve_work = 0; 3034 b->mult_work = 0; 3035 B->spptr = 0; 3036 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3037 b->keepnonzeropattern = PETSC_FALSE; 3038 b->xtoy = 0; 3039 b->XtoY = 0; 3040 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3044 #if defined(PETSC_HAVE_MUMPS) 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3046 #endif 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3058 PetscFunctionReturn(0); 3059 } 3060 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3063 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3064 { 3065 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3066 PetscErrorCode ierr; 3067 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3068 3069 PetscFunctionBegin; 3070 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3071 3072 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3073 c->imax = a->imax; 3074 c->ilen = a->ilen; 3075 c->free_imax_ilen = PETSC_FALSE; 3076 } else { 3077 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3078 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3079 for (i=0; i<mbs; i++) { 3080 c->imax[i] = a->imax[i]; 3081 c->ilen[i] = a->ilen[i]; 3082 } 3083 c->free_imax_ilen = PETSC_TRUE; 3084 } 3085 3086 /* allocate the matrix space */ 3087 if (mallocmatspace) { 3088 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3089 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3090 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3091 3092 c->i = a->i; 3093 c->j = a->j; 3094 c->singlemalloc = PETSC_FALSE; 3095 c->free_a = PETSC_TRUE; 3096 c->free_ij = PETSC_FALSE; 3097 c->parent = A; 3098 C->preallocated = PETSC_TRUE; 3099 C->assembled = PETSC_TRUE; 3100 3101 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3102 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3103 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3106 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3107 3108 c->singlemalloc = PETSC_TRUE; 3109 c->free_a = PETSC_TRUE; 3110 c->free_ij = PETSC_TRUE; 3111 3112 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3113 if (mbs > 0) { 3114 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3115 if (cpvalues == MAT_COPY_VALUES) { 3116 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3117 } else { 3118 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3119 } 3120 } 3121 C->preallocated = PETSC_TRUE; 3122 C->assembled = PETSC_TRUE; 3123 } 3124 } 3125 3126 c->roworiented = a->roworiented; 3127 c->nonew = a->nonew; 3128 3129 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3130 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3131 3132 c->bs2 = a->bs2; 3133 c->mbs = a->mbs; 3134 c->nbs = a->nbs; 3135 3136 if (a->diag) { 3137 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3138 c->diag = a->diag; 3139 c->free_diag = PETSC_FALSE; 3140 } else { 3141 ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr); 3142 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3143 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3144 c->free_diag = PETSC_TRUE; 3145 } 3146 } else c->diag = 0; 3147 3148 c->nz = a->nz; 3149 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3150 c->solve_work = 0; 3151 c->mult_work = 0; 3152 3153 c->compressedrow.use = a->compressedrow.use; 3154 c->compressedrow.nrows = a->compressedrow.nrows; 3155 if (a->compressedrow.use) { 3156 i = a->compressedrow.nrows; 3157 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3158 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3159 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3160 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3161 } else { 3162 c->compressedrow.use = PETSC_FALSE; 3163 c->compressedrow.i = NULL; 3164 c->compressedrow.rindex = NULL; 3165 } 3166 C->nonzerostate = A->nonzerostate; 3167 3168 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3169 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3170 PetscFunctionReturn(0); 3171 } 3172 3173 #undef __FUNCT__ 3174 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3175 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3176 { 3177 PetscErrorCode ierr; 3178 3179 PetscFunctionBegin; 3180 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3181 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3182 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3183 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3184 PetscFunctionReturn(0); 3185 } 3186 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "MatLoad_SeqBAIJ" 3189 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3190 { 3191 Mat_SeqBAIJ *a; 3192 PetscErrorCode ierr; 3193 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3194 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3195 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3196 PetscInt *masked,nmask,tmp,bs2,ishift; 3197 PetscMPIInt size; 3198 int fd; 3199 PetscScalar *aa; 3200 MPI_Comm comm; 3201 3202 PetscFunctionBegin; 3203 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3204 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3205 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3206 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3207 bs2 = bs*bs; 3208 3209 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3210 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3211 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3212 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3213 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3214 M = header[1]; N = header[2]; nz = header[3]; 3215 3216 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3217 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3218 3219 /* 3220 This code adds extra rows to make sure the number of rows is 3221 divisible by the blocksize 3222 */ 3223 mbs = M/bs; 3224 extra_rows = bs - M + bs*(mbs); 3225 if (extra_rows == bs) extra_rows = 0; 3226 else mbs++; 3227 if (extra_rows) { 3228 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3229 } 3230 3231 /* Set global sizes if not already set */ 3232 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3233 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3234 } else { /* Check if the matrix global sizes are correct */ 3235 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3236 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3237 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3238 } 3239 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3240 } 3241 3242 /* read in row lengths */ 3243 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3245 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3246 3247 /* read in column indices */ 3248 ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr); 3249 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3250 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3251 3252 /* loop over row lengths determining block row lengths */ 3253 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3254 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3255 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3256 rowcount = 0; 3257 nzcount = 0; 3258 for (i=0; i<mbs; i++) { 3259 nmask = 0; 3260 for (j=0; j<bs; j++) { 3261 kmax = rowlengths[rowcount]; 3262 for (k=0; k<kmax; k++) { 3263 tmp = jj[nzcount++]/bs; 3264 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3265 } 3266 rowcount++; 3267 } 3268 browlengths[i] += nmask; 3269 /* zero out the mask elements we set */ 3270 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3271 } 3272 3273 /* Do preallocation */ 3274 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3275 a = (Mat_SeqBAIJ*)newmat->data; 3276 3277 /* set matrix "i" values */ 3278 a->i[0] = 0; 3279 for (i=1; i<= mbs; i++) { 3280 a->i[i] = a->i[i-1] + browlengths[i-1]; 3281 a->ilen[i-1] = browlengths[i-1]; 3282 } 3283 a->nz = 0; 3284 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3285 3286 /* read in nonzero values */ 3287 ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3289 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3290 3291 /* set "a" and "j" values into matrix */ 3292 nzcount = 0; jcount = 0; 3293 for (i=0; i<mbs; i++) { 3294 nzcountb = nzcount; 3295 nmask = 0; 3296 for (j=0; j<bs; j++) { 3297 kmax = rowlengths[i*bs+j]; 3298 for (k=0; k<kmax; k++) { 3299 tmp = jj[nzcount++]/bs; 3300 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3301 } 3302 } 3303 /* sort the masked values */ 3304 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3305 3306 /* set "j" values into matrix */ 3307 maskcount = 1; 3308 for (j=0; j<nmask; j++) { 3309 a->j[jcount++] = masked[j]; 3310 mask[masked[j]] = maskcount++; 3311 } 3312 /* set "a" values into matrix */ 3313 ishift = bs2*a->i[i]; 3314 for (j=0; j<bs; j++) { 3315 kmax = rowlengths[i*bs+j]; 3316 for (k=0; k<kmax; k++) { 3317 tmp = jj[nzcountb]/bs; 3318 block = mask[tmp] - 1; 3319 point = jj[nzcountb] - bs*tmp; 3320 idx = ishift + bs2*block + j + bs*point; 3321 a->a[idx] = (MatScalar)aa[nzcountb++]; 3322 } 3323 } 3324 /* zero out the mask elements we set */ 3325 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3326 } 3327 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3328 3329 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3330 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3331 ierr = PetscFree(aa);CHKERRQ(ierr); 3332 ierr = PetscFree(jj);CHKERRQ(ierr); 3333 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3334 3335 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3336 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatCreateSeqBAIJ" 3342 /*@C 3343 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3344 compressed row) format. For good matrix assembly performance the 3345 user should preallocate the matrix storage by setting the parameter nz 3346 (or the array nnz). By setting these parameters accurately, performance 3347 during matrix assembly can be increased by more than a factor of 50. 3348 3349 Collective on MPI_Comm 3350 3351 Input Parameters: 3352 + comm - MPI communicator, set to PETSC_COMM_SELF 3353 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3354 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3355 . m - number of rows 3356 . n - number of columns 3357 . nz - number of nonzero blocks per block row (same for all rows) 3358 - nnz - array containing the number of nonzero blocks in the various block rows 3359 (possibly different for each block row) or NULL 3360 3361 Output Parameter: 3362 . A - the matrix 3363 3364 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3365 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3366 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3367 3368 Options Database Keys: 3369 . -mat_no_unroll - uses code that does not unroll the loops in the 3370 block calculations (much slower) 3371 . -mat_block_size - size of the blocks to use 3372 3373 Level: intermediate 3374 3375 Notes: 3376 The number of rows and columns must be divisible by blocksize. 3377 3378 If the nnz parameter is given then the nz parameter is ignored 3379 3380 A nonzero block is any block that as 1 or more nonzeros in it 3381 3382 The block AIJ format is fully compatible with standard Fortran 77 3383 storage. That is, the stored row and column indices can begin at 3384 either one (as in Fortran) or zero. See the users' manual for details. 3385 3386 Specify the preallocated storage with either nz or nnz (not both). 3387 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3388 allocation. See Users-Manual: ch_mat for details. 3389 matrices. 3390 3391 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3392 @*/ 3393 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3394 { 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3399 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3400 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3401 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 #undef __FUNCT__ 3406 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3407 /*@C 3408 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3409 per row in the matrix. For good matrix assembly performance the 3410 user should preallocate the matrix storage by setting the parameter nz 3411 (or the array nnz). By setting these parameters accurately, performance 3412 during matrix assembly can be increased by more than a factor of 50. 3413 3414 Collective on MPI_Comm 3415 3416 Input Parameters: 3417 + B - the matrix 3418 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3419 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3420 . nz - number of block nonzeros per block row (same for all rows) 3421 - nnz - array containing the number of block nonzeros in the various block rows 3422 (possibly different for each block row) or NULL 3423 3424 Options Database Keys: 3425 . -mat_no_unroll - uses code that does not unroll the loops in the 3426 block calculations (much slower) 3427 . -mat_block_size - size of the blocks to use 3428 3429 Level: intermediate 3430 3431 Notes: 3432 If the nnz parameter is given then the nz parameter is ignored 3433 3434 You can call MatGetInfo() to get information on how effective the preallocation was; 3435 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3436 You can also run with the option -info and look for messages with the string 3437 malloc in them to see if additional memory allocation was needed. 3438 3439 The block AIJ format is fully compatible with standard Fortran 77 3440 storage. That is, the stored row and column indices can begin at 3441 either one (as in Fortran) or zero. See the users' manual for details. 3442 3443 Specify the preallocated storage with either nz or nnz (not both). 3444 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3445 allocation. See Users-Manual: ch_mat for details. 3446 3447 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3448 @*/ 3449 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3450 { 3451 PetscErrorCode ierr; 3452 3453 PetscFunctionBegin; 3454 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3455 PetscValidType(B,1); 3456 PetscValidLogicalCollectiveInt(B,bs,2); 3457 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3458 PetscFunctionReturn(0); 3459 } 3460 3461 #undef __FUNCT__ 3462 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3463 /*@C 3464 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3465 (the default sequential PETSc format). 3466 3467 Collective on MPI_Comm 3468 3469 Input Parameters: 3470 + B - the matrix 3471 . i - the indices into j for the start of each local row (starts with zero) 3472 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3473 - v - optional values in the matrix 3474 3475 Level: developer 3476 3477 Notes: 3478 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3479 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3480 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3481 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3482 block column and the second index is over columns within a block. 3483 3484 .keywords: matrix, aij, compressed row, sparse 3485 3486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3487 @*/ 3488 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3489 { 3490 PetscErrorCode ierr; 3491 3492 PetscFunctionBegin; 3493 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3494 PetscValidType(B,1); 3495 PetscValidLogicalCollectiveInt(B,bs,2); 3496 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3497 PetscFunctionReturn(0); 3498 } 3499 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3503 /*@ 3504 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3505 3506 Collective on MPI_Comm 3507 3508 Input Parameters: 3509 + comm - must be an MPI communicator of size 1 3510 . bs - size of block 3511 . m - number of rows 3512 . n - number of columns 3513 . i - row indices 3514 . j - column indices 3515 - a - matrix values 3516 3517 Output Parameter: 3518 . mat - the matrix 3519 3520 Level: advanced 3521 3522 Notes: 3523 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3524 once the matrix is destroyed 3525 3526 You cannot set new nonzero locations into this matrix, that will generate an error. 3527 3528 The i and j indices are 0 based 3529 3530 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3531 3532 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3533 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3534 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3535 with column-major ordering within blocks. 3536 3537 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3538 3539 @*/ 3540 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3541 { 3542 PetscErrorCode ierr; 3543 PetscInt ii; 3544 Mat_SeqBAIJ *baij; 3545 3546 PetscFunctionBegin; 3547 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3548 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3549 3550 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3551 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3552 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3553 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3554 baij = (Mat_SeqBAIJ*)(*mat)->data; 3555 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3556 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3557 3558 baij->i = i; 3559 baij->j = j; 3560 baij->a = a; 3561 3562 baij->singlemalloc = PETSC_FALSE; 3563 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3564 baij->free_a = PETSC_FALSE; 3565 baij->free_ij = PETSC_FALSE; 3566 3567 for (ii=0; ii<m; ii++) { 3568 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3569 #if defined(PETSC_USE_DEBUG) 3570 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3571 #endif 3572 } 3573 #if defined(PETSC_USE_DEBUG) 3574 for (ii=0; ii<baij->i[m]; ii++) { 3575 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3576 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3577 } 3578 #endif 3579 3580 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3581 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3582 PetscFunctionReturn(0); 3583 } 3584 3585 #undef __FUNCT__ 3586 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3588 { 3589 PetscErrorCode ierr; 3590 3591 PetscFunctionBegin; 3592 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3593 PetscFunctionReturn(0); 3594 } 3595