1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*ii = a->i,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !ii) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (diag[i] >= ii[i+1]) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatDestroy_Redundant(&A->redundant);CHKERRQ(ierr); 1200 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1202 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1203 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1204 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1205 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1206 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1208 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1209 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1210 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1211 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1212 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1213 1214 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1215 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1216 ierr = PetscFree(A->data);CHKERRQ(ierr); 1217 1218 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1235 { 1236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 switch (op) { 1241 case MAT_ROW_ORIENTED: 1242 a->roworiented = flg; 1243 break; 1244 case MAT_KEEP_NONZERO_PATTERN: 1245 a->keepnonzeropattern = flg; 1246 break; 1247 case MAT_NEW_NONZERO_LOCATIONS: 1248 a->nonew = (flg ? 0 : 1); 1249 break; 1250 case MAT_NEW_NONZERO_LOCATION_ERR: 1251 a->nonew = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1254 a->nonew = (flg ? -2 : 0); 1255 break; 1256 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1257 a->nounused = (flg ? -1 : 0); 1258 break; 1259 case MAT_NEW_DIAGONALS: 1260 case MAT_IGNORE_OFF_PROC_ENTRIES: 1261 case MAT_USE_HASH_TABLE: 1262 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1263 break; 1264 case MAT_SPD: 1265 case MAT_SYMMETRIC: 1266 case MAT_STRUCTURALLY_SYMMETRIC: 1267 case MAT_HERMITIAN: 1268 case MAT_SYMMETRY_ETERNAL: 1269 /* These options are handled directly by MatSetOption() */ 1270 break; 1271 default: 1272 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1280 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1281 { 1282 PetscErrorCode ierr; 1283 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1284 MatScalar *aa_i; 1285 PetscScalar *v_i; 1286 1287 PetscFunctionBegin; 1288 bs = A->rmap->bs; 1289 bs2 = bs*bs; 1290 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1291 1292 bn = row/bs; /* Block number */ 1293 bp = row % bs; /* Block Position */ 1294 M = ai[bn+1] - ai[bn]; 1295 *nz = bs*M; 1296 1297 if (v) { 1298 *v = 0; 1299 if (*nz) { 1300 ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr); 1301 for (i=0; i<M; i++) { /* for each block in the block row */ 1302 v_i = *v + i*bs; 1303 aa_i = aa + bs2*(ai[bn] + i); 1304 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1305 } 1306 } 1307 } 1308 1309 if (idx) { 1310 *idx = 0; 1311 if (*nz) { 1312 ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr); 1313 for (i=0; i<M; i++) { /* for each block in the block row */ 1314 idx_i = *idx + i*bs; 1315 itmp = bs*aj[ai[bn] + i]; 1316 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1317 } 1318 } 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1325 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1337 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1338 { 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1343 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1344 PetscFunctionReturn(0); 1345 } 1346 1347 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1351 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1352 { 1353 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1354 Mat C; 1355 PetscErrorCode ierr; 1356 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1357 PetscInt *rows,*cols,bs2=a->bs2; 1358 MatScalar *array; 1359 1360 PetscFunctionBegin; 1361 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1362 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1363 ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr); 1364 1365 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1366 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1367 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1368 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1369 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1370 ierr = PetscFree(col);CHKERRQ(ierr); 1371 } else { 1372 C = *B; 1373 } 1374 1375 array = a->a; 1376 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1377 for (i=0; i<mbs; i++) { 1378 cols[0] = i*bs; 1379 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1380 len = ai[i+1] - ai[i]; 1381 for (j=0; j<len; j++) { 1382 rows[0] = (*aj++)*bs; 1383 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1384 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1385 array += bs2; 1386 } 1387 } 1388 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1389 1390 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 1393 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1394 *B = C; 1395 } else { 1396 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1404 { 1405 PetscErrorCode ierr; 1406 Mat Btrans; 1407 1408 PetscFunctionBegin; 1409 *f = PETSC_FALSE; 1410 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1411 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1412 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1418 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1419 { 1420 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1421 PetscErrorCode ierr; 1422 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1423 int fd; 1424 PetscScalar *aa; 1425 FILE *file; 1426 1427 PetscFunctionBegin; 1428 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1429 ierr = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr); 1430 col_lens[0] = MAT_FILE_CLASSID; 1431 1432 col_lens[1] = A->rmap->N; 1433 col_lens[2] = A->cmap->n; 1434 col_lens[3] = a->nz*bs2; 1435 1436 /* store lengths of each row and write (including header) to file */ 1437 count = 0; 1438 for (i=0; i<a->mbs; i++) { 1439 for (j=0; j<bs; j++) { 1440 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1441 } 1442 } 1443 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1444 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1445 1446 /* store column indices (zero start index) */ 1447 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1448 count = 0; 1449 for (i=0; i<a->mbs; i++) { 1450 for (j=0; j<bs; j++) { 1451 for (k=a->i[i]; k<a->i[i+1]; k++) { 1452 for (l=0; l<bs; l++) { 1453 jj[count++] = bs*a->j[k] + l; 1454 } 1455 } 1456 } 1457 } 1458 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1459 ierr = PetscFree(jj);CHKERRQ(ierr); 1460 1461 /* store nonzero values */ 1462 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1463 count = 0; 1464 for (i=0; i<a->mbs; i++) { 1465 for (j=0; j<bs; j++) { 1466 for (k=a->i[i]; k<a->i[i+1]; k++) { 1467 for (l=0; l<bs; l++) { 1468 aa[count++] = a->a[bs2*k + l*bs + j]; 1469 } 1470 } 1471 } 1472 } 1473 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscFree(aa);CHKERRQ(ierr); 1475 1476 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1477 if (file) { 1478 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1485 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 PetscErrorCode ierr; 1489 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1490 PetscViewerFormat format; 1491 1492 PetscFunctionBegin; 1493 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1494 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1495 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1496 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1497 Mat aij; 1498 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1499 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1500 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1501 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1502 PetscFunctionReturn(0); 1503 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1504 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1505 for (i=0; i<a->mbs; i++) { 1506 for (j=0; j<bs; j++) { 1507 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1508 for (k=a->i[i]; k<a->i[i+1]; k++) { 1509 for (l=0; l<bs; l++) { 1510 #if defined(PETSC_USE_COMPLEX) 1511 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1512 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1513 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1514 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1515 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1516 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1517 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1518 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1519 } 1520 #else 1521 if (a->a[bs2*k + l*bs + j] != 0.0) { 1522 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1523 } 1524 #endif 1525 } 1526 } 1527 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1528 } 1529 } 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1531 } else { 1532 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1533 for (i=0; i<a->mbs; i++) { 1534 for (j=0; j<bs; j++) { 1535 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1536 for (k=a->i[i]; k<a->i[i+1]; k++) { 1537 for (l=0; l<bs; l++) { 1538 #if defined(PETSC_USE_COMPLEX) 1539 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1541 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1544 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1547 } 1548 #else 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1550 #endif 1551 } 1552 } 1553 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1554 } 1555 } 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1557 } 1558 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #include <petscdraw.h> 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1565 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1566 { 1567 Mat A = (Mat) Aa; 1568 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1569 PetscErrorCode ierr; 1570 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1571 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1572 MatScalar *aa; 1573 PetscViewer viewer; 1574 PetscViewerFormat format; 1575 1576 PetscFunctionBegin; 1577 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1578 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1579 1580 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1581 1582 /* loop over matrix elements drawing boxes */ 1583 1584 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1585 color = PETSC_DRAW_BLUE; 1586 for (i=0,row=0; i<mbs; i++,row+=bs) { 1587 for (j=a->i[i]; j<a->i[i+1]; j++) { 1588 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1589 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1590 aa = a->a + j*bs2; 1591 for (k=0; k<bs; k++) { 1592 for (l=0; l<bs; l++) { 1593 if (PetscRealPart(*aa++) >= 0.) continue; 1594 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1595 } 1596 } 1597 } 1598 } 1599 color = PETSC_DRAW_CYAN; 1600 for (i=0,row=0; i<mbs; i++,row+=bs) { 1601 for (j=a->i[i]; j<a->i[i+1]; j++) { 1602 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1603 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1604 aa = a->a + j*bs2; 1605 for (k=0; k<bs; k++) { 1606 for (l=0; l<bs; l++) { 1607 if (PetscRealPart(*aa++) != 0.) continue; 1608 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1609 } 1610 } 1611 } 1612 } 1613 color = PETSC_DRAW_RED; 1614 for (i=0,row=0; i<mbs; i++,row+=bs) { 1615 for (j=a->i[i]; j<a->i[i+1]; j++) { 1616 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1617 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1618 aa = a->a + j*bs2; 1619 for (k=0; k<bs; k++) { 1620 for (l=0; l<bs; l++) { 1621 if (PetscRealPart(*aa++) <= 0.) continue; 1622 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1623 } 1624 } 1625 } 1626 } 1627 } else { 1628 /* use contour shading to indicate magnitude of values */ 1629 /* first determine max of all nonzero values */ 1630 PetscDraw popup; 1631 PetscReal scale,maxv = 0.0; 1632 1633 for (i=0; i<a->nz*a->bs2; i++) { 1634 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1635 } 1636 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1637 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1638 if (popup) { 1639 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1640 } 1641 for (i=0,row=0; i<mbs; i++,row+=bs) { 1642 for (j=a->i[i]; j<a->i[i+1]; j++) { 1643 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1644 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1645 aa = a->a + j*bs2; 1646 for (k=0; k<bs; k++) { 1647 for (l=0; l<bs; l++) { 1648 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1649 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1650 } 1651 } 1652 } 1653 } 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1660 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1661 { 1662 PetscErrorCode ierr; 1663 PetscReal xl,yl,xr,yr,w,h; 1664 PetscDraw draw; 1665 PetscBool isnull; 1666 1667 PetscFunctionBegin; 1668 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1669 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1670 1671 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1672 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1673 xr += w; yr += h; xl = -w; yl = -h; 1674 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1675 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1676 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatView_SeqBAIJ" 1682 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1683 { 1684 PetscErrorCode ierr; 1685 PetscBool iascii,isbinary,isdraw; 1686 1687 PetscFunctionBegin; 1688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1689 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1690 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1691 if (iascii) { 1692 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1693 } else if (isbinary) { 1694 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1695 } else if (isdraw) { 1696 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1697 } else { 1698 Mat B; 1699 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1700 ierr = MatView(B,viewer);CHKERRQ(ierr); 1701 ierr = MatDestroy(&B);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1709 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1710 { 1711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1712 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1713 PetscInt *ai = a->i,*ailen = a->ilen; 1714 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1715 MatScalar *ap,*aa = a->a; 1716 1717 PetscFunctionBegin; 1718 for (k=0; k<m; k++) { /* loop over rows */ 1719 row = im[k]; brow = row/bs; 1720 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1721 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1722 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1723 nrow = ailen[brow]; 1724 for (l=0; l<n; l++) { /* loop over columns */ 1725 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1726 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1727 col = in[l]; 1728 bcol = col/bs; 1729 cidx = col%bs; 1730 ridx = row%bs; 1731 high = nrow; 1732 low = 0; /* assume unsorted */ 1733 while (high-low > 5) { 1734 t = (low+high)/2; 1735 if (rp[t] > bcol) high = t; 1736 else low = t; 1737 } 1738 for (i=low; i<high; i++) { 1739 if (rp[i] > bcol) break; 1740 if (rp[i] == bcol) { 1741 *v++ = ap[bs2*i+bs*cidx+ridx]; 1742 goto finished; 1743 } 1744 } 1745 *v++ = 0.0; 1746 finished:; 1747 } 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1754 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1755 { 1756 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1757 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1758 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1759 PetscErrorCode ierr; 1760 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1761 PetscBool roworiented=a->roworiented; 1762 const PetscScalar *value = v; 1763 MatScalar *ap,*aa = a->a,*bap; 1764 1765 PetscFunctionBegin; 1766 if (roworiented) { 1767 stepval = (n-1)*bs; 1768 } else { 1769 stepval = (m-1)*bs; 1770 } 1771 for (k=0; k<m; k++) { /* loop over added rows */ 1772 row = im[k]; 1773 if (row < 0) continue; 1774 #if defined(PETSC_USE_DEBUG) 1775 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1776 #endif 1777 rp = aj + ai[row]; 1778 ap = aa + bs2*ai[row]; 1779 rmax = imax[row]; 1780 nrow = ailen[row]; 1781 low = 0; 1782 high = nrow; 1783 for (l=0; l<n; l++) { /* loop over added columns */ 1784 if (in[l] < 0) continue; 1785 #if defined(PETSC_USE_DEBUG) 1786 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1787 #endif 1788 col = in[l]; 1789 if (roworiented) { 1790 value = v + (k*(stepval+bs) + l)*bs; 1791 } else { 1792 value = v + (l*(stepval+bs) + k)*bs; 1793 } 1794 if (col <= lastcol) low = 0; 1795 else high = nrow; 1796 lastcol = col; 1797 while (high-low > 7) { 1798 t = (low+high)/2; 1799 if (rp[t] > col) high = t; 1800 else low = t; 1801 } 1802 for (i=low; i<high; i++) { 1803 if (rp[i] > col) break; 1804 if (rp[i] == col) { 1805 bap = ap + bs2*i; 1806 if (roworiented) { 1807 if (is == ADD_VALUES) { 1808 for (ii=0; ii<bs; ii++,value+=stepval) { 1809 for (jj=ii; jj<bs2; jj+=bs) { 1810 bap[jj] += *value++; 1811 } 1812 } 1813 } else { 1814 for (ii=0; ii<bs; ii++,value+=stepval) { 1815 for (jj=ii; jj<bs2; jj+=bs) { 1816 bap[jj] = *value++; 1817 } 1818 } 1819 } 1820 } else { 1821 if (is == ADD_VALUES) { 1822 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1823 for (jj=0; jj<bs; jj++) { 1824 bap[jj] += value[jj]; 1825 } 1826 bap += bs; 1827 } 1828 } else { 1829 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1830 for (jj=0; jj<bs; jj++) { 1831 bap[jj] = value[jj]; 1832 } 1833 bap += bs; 1834 } 1835 } 1836 } 1837 goto noinsert2; 1838 } 1839 } 1840 if (nonew == 1) goto noinsert2; 1841 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1842 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1843 N = nrow++ - 1; high++; 1844 /* shift up all the later entries in this row */ 1845 for (ii=N; ii>=i; ii--) { 1846 rp[ii+1] = rp[ii]; 1847 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1848 } 1849 if (N >= i) { 1850 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1851 } 1852 rp[i] = col; 1853 bap = ap + bs2*i; 1854 if (roworiented) { 1855 for (ii=0; ii<bs; ii++,value+=stepval) { 1856 for (jj=ii; jj<bs2; jj+=bs) { 1857 bap[jj] = *value++; 1858 } 1859 } 1860 } else { 1861 for (ii=0; ii<bs; ii++,value+=stepval) { 1862 for (jj=0; jj<bs; jj++) { 1863 *bap++ = *value++; 1864 } 1865 } 1866 } 1867 noinsert2:; 1868 low = i; 1869 } 1870 ailen[row] = nrow; 1871 } 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1877 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1878 { 1879 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1880 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1881 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1882 PetscErrorCode ierr; 1883 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1884 MatScalar *aa = a->a,*ap; 1885 PetscReal ratio=0.6; 1886 1887 PetscFunctionBegin; 1888 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1889 1890 if (m) rmax = ailen[0]; 1891 for (i=1; i<mbs; i++) { 1892 /* move each row back by the amount of empty slots (fshift) before it*/ 1893 fshift += imax[i-1] - ailen[i-1]; 1894 rmax = PetscMax(rmax,ailen[i]); 1895 if (fshift) { 1896 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1897 N = ailen[i]; 1898 for (j=0; j<N; j++) { 1899 ip[j-fshift] = ip[j]; 1900 1901 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1902 } 1903 } 1904 ai[i] = ai[i-1] + ailen[i-1]; 1905 } 1906 if (mbs) { 1907 fshift += imax[mbs-1] - ailen[mbs-1]; 1908 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1909 } 1910 1911 /* reset ilen and imax for each row */ 1912 a->nonzerorowcnt = 0; 1913 for (i=0; i<mbs; i++) { 1914 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1915 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1916 } 1917 a->nz = ai[mbs]; 1918 1919 /* diagonals may have moved, so kill the diagonal pointers */ 1920 a->idiagvalid = PETSC_FALSE; 1921 if (fshift && a->diag) { 1922 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1923 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1924 a->diag = 0; 1925 } 1926 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1927 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1928 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1929 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1930 1931 A->info.mallocs += a->reallocs; 1932 a->reallocs = 0; 1933 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1934 a->rmax = rmax; 1935 1936 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 /* 1941 This function returns an array of flags which indicate the locations of contiguous 1942 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1943 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1944 Assume: sizes should be long enough to hold all the values. 1945 */ 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1948 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1949 { 1950 PetscInt i,j,k,row; 1951 PetscBool flg; 1952 1953 PetscFunctionBegin; 1954 for (i=0,j=0; i<n; j++) { 1955 row = idx[i]; 1956 if (row%bs!=0) { /* Not the begining of a block */ 1957 sizes[j] = 1; 1958 i++; 1959 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1960 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1961 i++; 1962 } else { /* Begining of the block, so check if the complete block exists */ 1963 flg = PETSC_TRUE; 1964 for (k=1; k<bs; k++) { 1965 if (row+k != idx[i+k]) { /* break in the block */ 1966 flg = PETSC_FALSE; 1967 break; 1968 } 1969 } 1970 if (flg) { /* No break in the bs */ 1971 sizes[j] = bs; 1972 i += bs; 1973 } else { 1974 sizes[j] = 1; 1975 i++; 1976 } 1977 } 1978 } 1979 *bs_max = j; 1980 PetscFunctionReturn(0); 1981 } 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1985 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1986 { 1987 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1988 PetscErrorCode ierr; 1989 PetscInt i,j,k,count,*rows; 1990 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1991 PetscScalar zero = 0.0; 1992 MatScalar *aa; 1993 const PetscScalar *xx; 1994 PetscScalar *bb; 1995 1996 PetscFunctionBegin; 1997 /* fix right hand side if needed */ 1998 if (x && b) { 1999 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2000 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2001 for (i=0; i<is_n; i++) { 2002 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2003 } 2004 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2005 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2006 } 2007 2008 /* Make a copy of the IS and sort it */ 2009 /* allocate memory for rows,sizes */ 2010 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2011 2012 /* copy IS values to rows, and sort them */ 2013 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2014 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2015 2016 if (baij->keepnonzeropattern) { 2017 for (i=0; i<is_n; i++) sizes[i] = 1; 2018 bs_max = is_n; 2019 } else { 2020 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2021 A->nonzerostate++; 2022 } 2023 2024 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2025 row = rows[j]; 2026 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2027 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2028 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2029 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2030 if (diag != (PetscScalar)0.0) { 2031 if (baij->ilen[row/bs] > 0) { 2032 baij->ilen[row/bs] = 1; 2033 baij->j[baij->i[row/bs]] = row/bs; 2034 2035 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2036 } 2037 /* Now insert all the diagonal values for this bs */ 2038 for (k=0; k<bs; k++) { 2039 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2040 } 2041 } else { /* (diag == 0.0) */ 2042 baij->ilen[row/bs] = 0; 2043 } /* end (diag == 0.0) */ 2044 } else { /* (sizes[i] != bs) */ 2045 #if defined(PETSC_USE_DEBUG) 2046 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2047 #endif 2048 for (k=0; k<count; k++) { 2049 aa[0] = zero; 2050 aa += bs; 2051 } 2052 if (diag != (PetscScalar)0.0) { 2053 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2054 } 2055 } 2056 } 2057 2058 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2059 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2065 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2066 { 2067 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2068 PetscErrorCode ierr; 2069 PetscInt i,j,k,count; 2070 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2071 PetscScalar zero = 0.0; 2072 MatScalar *aa; 2073 const PetscScalar *xx; 2074 PetscScalar *bb; 2075 PetscBool *zeroed,vecs = PETSC_FALSE; 2076 2077 PetscFunctionBegin; 2078 /* fix right hand side if needed */ 2079 if (x && b) { 2080 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2081 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2082 vecs = PETSC_TRUE; 2083 } 2084 2085 /* zero the columns */ 2086 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2087 for (i=0; i<is_n; i++) { 2088 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2089 zeroed[is_idx[i]] = PETSC_TRUE; 2090 } 2091 for (i=0; i<A->rmap->N; i++) { 2092 if (!zeroed[i]) { 2093 row = i/bs; 2094 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2095 for (k=0; k<bs; k++) { 2096 col = bs*baij->j[j] + k; 2097 if (zeroed[col]) { 2098 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2099 if (vecs) bb[i] -= aa[0]*xx[col]; 2100 aa[0] = 0.0; 2101 } 2102 } 2103 } 2104 } else if (vecs) bb[i] = diag*xx[i]; 2105 } 2106 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2107 if (vecs) { 2108 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2109 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2110 } 2111 2112 /* zero the rows */ 2113 for (i=0; i<is_n; i++) { 2114 row = is_idx[i]; 2115 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2116 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2117 for (k=0; k<count; k++) { 2118 aa[0] = zero; 2119 aa += bs; 2120 } 2121 if (diag != (PetscScalar)0.0) { 2122 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2123 } 2124 } 2125 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2131 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2132 { 2133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2134 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2135 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2136 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2137 PetscErrorCode ierr; 2138 PetscInt ridx,cidx,bs2=a->bs2; 2139 PetscBool roworiented=a->roworiented; 2140 MatScalar *ap,value,*aa=a->a,*bap; 2141 2142 PetscFunctionBegin; 2143 for (k=0; k<m; k++) { /* loop over added rows */ 2144 row = im[k]; 2145 brow = row/bs; 2146 if (row < 0) continue; 2147 #if defined(PETSC_USE_DEBUG) 2148 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2149 #endif 2150 rp = aj + ai[brow]; 2151 ap = aa + bs2*ai[brow]; 2152 rmax = imax[brow]; 2153 nrow = ailen[brow]; 2154 low = 0; 2155 high = nrow; 2156 for (l=0; l<n; l++) { /* loop over added columns */ 2157 if (in[l] < 0) continue; 2158 #if defined(PETSC_USE_DEBUG) 2159 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2160 #endif 2161 col = in[l]; bcol = col/bs; 2162 ridx = row % bs; cidx = col % bs; 2163 if (roworiented) { 2164 value = v[l + k*n]; 2165 } else { 2166 value = v[k + l*m]; 2167 } 2168 if (col <= lastcol) low = 0; else high = nrow; 2169 lastcol = col; 2170 while (high-low > 7) { 2171 t = (low+high)/2; 2172 if (rp[t] > bcol) high = t; 2173 else low = t; 2174 } 2175 for (i=low; i<high; i++) { 2176 if (rp[i] > bcol) break; 2177 if (rp[i] == bcol) { 2178 bap = ap + bs2*i + bs*cidx + ridx; 2179 if (is == ADD_VALUES) *bap += value; 2180 else *bap = value; 2181 goto noinsert1; 2182 } 2183 } 2184 if (nonew == 1) goto noinsert1; 2185 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2186 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2187 N = nrow++ - 1; high++; 2188 /* shift up all the later entries in this row */ 2189 for (ii=N; ii>=i; ii--) { 2190 rp[ii+1] = rp[ii]; 2191 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2192 } 2193 if (N>=i) { 2194 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2195 } 2196 rp[i] = bcol; 2197 ap[bs2*i + bs*cidx + ridx] = value; 2198 a->nz++; 2199 A->nonzerostate++; 2200 noinsert1:; 2201 low = i; 2202 } 2203 ailen[brow] = nrow; 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2210 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2211 { 2212 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2213 Mat outA; 2214 PetscErrorCode ierr; 2215 PetscBool row_identity,col_identity; 2216 2217 PetscFunctionBegin; 2218 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2219 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2220 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2221 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2222 2223 outA = inA; 2224 inA->factortype = MAT_FACTOR_LU; 2225 2226 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2227 2228 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2229 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2230 a->row = row; 2231 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2232 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2233 a->col = col; 2234 2235 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2236 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2237 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2238 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2239 2240 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2241 if (!a->solve_work) { 2242 ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr); 2243 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2244 } 2245 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 #undef __FUNCT__ 2250 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2251 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2252 { 2253 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2254 PetscInt i,nz,mbs; 2255 2256 PetscFunctionBegin; 2257 nz = baij->maxnz; 2258 mbs = baij->mbs; 2259 for (i=0; i<nz; i++) { 2260 baij->j[i] = indices[i]; 2261 } 2262 baij->nz = nz; 2263 for (i=0; i<mbs; i++) { 2264 baij->ilen[i] = baij->imax[i]; 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2271 /*@ 2272 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2273 in the matrix. 2274 2275 Input Parameters: 2276 + mat - the SeqBAIJ matrix 2277 - indices - the column indices 2278 2279 Level: advanced 2280 2281 Notes: 2282 This can be called if you have precomputed the nonzero structure of the 2283 matrix and want to provide it to the matrix object to improve the performance 2284 of the MatSetValues() operation. 2285 2286 You MUST have set the correct numbers of nonzeros per row in the call to 2287 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2288 2289 MUST be called before any calls to MatSetValues(); 2290 2291 @*/ 2292 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2293 { 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2298 PetscValidPointer(indices,2); 2299 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2305 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2306 { 2307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2310 PetscReal atmp; 2311 PetscScalar *x,zero = 0.0; 2312 MatScalar *aa; 2313 PetscInt ncols,brow,krow,kcol; 2314 2315 PetscFunctionBegin; 2316 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2317 bs = A->rmap->bs; 2318 aa = a->a; 2319 ai = a->i; 2320 aj = a->j; 2321 mbs = a->mbs; 2322 2323 ierr = VecSet(v,zero);CHKERRQ(ierr); 2324 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2325 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2326 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2327 for (i=0; i<mbs; i++) { 2328 ncols = ai[1] - ai[0]; ai++; 2329 brow = bs*i; 2330 for (j=0; j<ncols; j++) { 2331 for (kcol=0; kcol<bs; kcol++) { 2332 for (krow=0; krow<bs; krow++) { 2333 atmp = PetscAbsScalar(*aa);aa++; 2334 row = brow + krow; /* row index */ 2335 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2336 } 2337 } 2338 aj++; 2339 } 2340 } 2341 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatCopy_SeqBAIJ" 2347 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2348 { 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 /* If the two matrices have the same copy implementation, use fast copy. */ 2353 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2355 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2356 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2357 2358 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2359 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2360 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2361 } else { 2362 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2363 } 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2369 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2370 { 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2380 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2381 { 2382 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2383 2384 PetscFunctionBegin; 2385 *array = a->a; 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2391 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2392 { 2393 PetscFunctionBegin; 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2399 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2400 { 2401 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2402 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2403 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2404 PetscErrorCode ierr; 2405 2406 PetscFunctionBegin; 2407 /* Set the number of nonzeros in the new matrix */ 2408 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2414 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2415 { 2416 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2417 PetscErrorCode ierr; 2418 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2419 PetscBLASInt one=1; 2420 2421 PetscFunctionBegin; 2422 if (str == SAME_NONZERO_PATTERN) { 2423 PetscScalar alpha = a; 2424 PetscBLASInt bnz; 2425 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2426 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2427 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2428 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2429 if (y->xtoy && y->XtoY != X) { 2430 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2431 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2432 } 2433 if (!y->xtoy) { /* get xtoy */ 2434 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2435 y->XtoY = X; 2436 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2437 } 2438 for (i=0; i<x->nz; i++) { 2439 j = 0; 2440 while (j < bs2) { 2441 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2442 j++; 2443 } 2444 } 2445 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2446 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2447 } else { 2448 Mat B; 2449 PetscInt *nnz; 2450 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2451 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2452 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2453 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2454 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2455 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2456 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2457 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2458 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2459 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2460 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2461 ierr = PetscFree(nnz);CHKERRQ(ierr); 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2468 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2469 { 2470 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2471 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2472 MatScalar *aa = a->a; 2473 2474 PetscFunctionBegin; 2475 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2481 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2482 { 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2484 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2485 MatScalar *aa = a->a; 2486 2487 PetscFunctionBegin; 2488 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2494 /* 2495 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2496 */ 2497 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2498 { 2499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2500 PetscErrorCode ierr; 2501 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2502 PetscInt nz = a->i[m],row,*jj,mr,col; 2503 2504 PetscFunctionBegin; 2505 *nn = n; 2506 if (!ia) PetscFunctionReturn(0); 2507 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2508 else { 2509 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2510 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2511 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2512 jj = a->j; 2513 for (i=0; i<nz; i++) { 2514 collengths[jj[i]]++; 2515 } 2516 cia[0] = oshift; 2517 for (i=0; i<n; i++) { 2518 cia[i+1] = cia[i] + collengths[i]; 2519 } 2520 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2521 jj = a->j; 2522 for (row=0; row<m; row++) { 2523 mr = a->i[row+1] - a->i[row]; 2524 for (i=0; i<mr; i++) { 2525 col = *jj++; 2526 2527 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2528 } 2529 } 2530 ierr = PetscFree(collengths);CHKERRQ(ierr); 2531 *ia = cia; *ja = cja; 2532 } 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2538 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2539 { 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 if (!ia) PetscFunctionReturn(0); 2544 ierr = PetscFree(*ia);CHKERRQ(ierr); 2545 ierr = PetscFree(*ja);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /* 2550 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2551 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2552 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2553 */ 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2557 { 2558 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2559 PetscErrorCode ierr; 2560 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2561 PetscInt nz = a->i[m],row,*jj,mr,col; 2562 PetscInt *cspidx; 2563 2564 PetscFunctionBegin; 2565 *nn = n; 2566 if (!ia) PetscFunctionReturn(0); 2567 2568 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2569 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2570 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2571 ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr); 2572 jj = a->j; 2573 for (i=0; i<nz; i++) { 2574 collengths[jj[i]]++; 2575 } 2576 cia[0] = oshift; 2577 for (i=0; i<n; i++) { 2578 cia[i+1] = cia[i] + collengths[i]; 2579 } 2580 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2581 jj = a->j; 2582 for (row=0; row<m; row++) { 2583 mr = a->i[row+1] - a->i[row]; 2584 for (i=0; i<mr; i++) { 2585 col = *jj++; 2586 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2587 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2588 } 2589 } 2590 ierr = PetscFree(collengths);CHKERRQ(ierr); 2591 *ia = cia; *ja = cja; 2592 *spidx = cspidx; 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2598 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2599 { 2600 PetscErrorCode ierr; 2601 2602 PetscFunctionBegin; 2603 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2604 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2605 PetscFunctionReturn(0); 2606 } 2607 2608 /* -------------------------------------------------------------------*/ 2609 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2610 MatGetRow_SeqBAIJ, 2611 MatRestoreRow_SeqBAIJ, 2612 MatMult_SeqBAIJ_N, 2613 /* 4*/ MatMultAdd_SeqBAIJ_N, 2614 MatMultTranspose_SeqBAIJ, 2615 MatMultTransposeAdd_SeqBAIJ, 2616 0, 2617 0, 2618 0, 2619 /* 10*/ 0, 2620 MatLUFactor_SeqBAIJ, 2621 0, 2622 0, 2623 MatTranspose_SeqBAIJ, 2624 /* 15*/ MatGetInfo_SeqBAIJ, 2625 MatEqual_SeqBAIJ, 2626 MatGetDiagonal_SeqBAIJ, 2627 MatDiagonalScale_SeqBAIJ, 2628 MatNorm_SeqBAIJ, 2629 /* 20*/ 0, 2630 MatAssemblyEnd_SeqBAIJ, 2631 MatSetOption_SeqBAIJ, 2632 MatZeroEntries_SeqBAIJ, 2633 /* 24*/ MatZeroRows_SeqBAIJ, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /* 29*/ MatSetUp_SeqBAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /* 34*/ MatDuplicate_SeqBAIJ, 2644 0, 2645 0, 2646 MatILUFactor_SeqBAIJ, 2647 0, 2648 /* 39*/ MatAXPY_SeqBAIJ, 2649 MatGetSubMatrices_SeqBAIJ, 2650 MatIncreaseOverlap_SeqBAIJ, 2651 MatGetValues_SeqBAIJ, 2652 MatCopy_SeqBAIJ, 2653 /* 44*/ 0, 2654 MatScale_SeqBAIJ, 2655 0, 2656 0, 2657 MatZeroRowsColumns_SeqBAIJ, 2658 /* 49*/ 0, 2659 MatGetRowIJ_SeqBAIJ, 2660 MatRestoreRowIJ_SeqBAIJ, 2661 MatGetColumnIJ_SeqBAIJ, 2662 MatRestoreColumnIJ_SeqBAIJ, 2663 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2664 0, 2665 0, 2666 0, 2667 MatSetValuesBlocked_SeqBAIJ, 2668 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2669 MatDestroy_SeqBAIJ, 2670 MatView_SeqBAIJ, 2671 0, 2672 0, 2673 /* 64*/ 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2679 0, 2680 MatConvert_Basic, 2681 0, 2682 0, 2683 /* 74*/ 0, 2684 MatFDColoringApply_BAIJ, 2685 0, 2686 0, 2687 0, 2688 /* 79*/ 0, 2689 0, 2690 0, 2691 0, 2692 MatLoad_SeqBAIJ, 2693 /* 84*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /* 89*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /* 94*/ 0, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /* 99*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /*104*/ 0, 2714 MatRealPart_SeqBAIJ, 2715 MatImaginaryPart_SeqBAIJ, 2716 0, 2717 0, 2718 /*109*/ 0, 2719 0, 2720 0, 2721 0, 2722 MatMissingDiagonal_SeqBAIJ, 2723 /*114*/ 0, 2724 0, 2725 0, 2726 0, 2727 0, 2728 /*119*/ 0, 2729 0, 2730 MatMultHermitianTranspose_SeqBAIJ, 2731 MatMultHermitianTransposeAdd_SeqBAIJ, 2732 0, 2733 /*124*/ 0, 2734 0, 2735 MatInvertBlockDiagonal_SeqBAIJ, 2736 0, 2737 0, 2738 /*129*/ 0, 2739 0, 2740 0, 2741 0, 2742 0, 2743 /*134*/ 0, 2744 0, 2745 0, 2746 0, 2747 0, 2748 /*139*/ 0, 2749 0, 2750 0, 2751 MatFDColoringSetUp_SeqXAIJ 2752 }; 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2756 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2757 { 2758 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2759 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2760 PetscErrorCode ierr; 2761 2762 PetscFunctionBegin; 2763 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2764 2765 /* allocate space for values if not already there */ 2766 if (!aij->saved_values) { 2767 ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr); 2768 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2769 } 2770 2771 /* copy values over */ 2772 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2778 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2779 { 2780 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2781 PetscErrorCode ierr; 2782 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2783 2784 PetscFunctionBegin; 2785 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2786 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2787 2788 /* copy values over */ 2789 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2790 PetscFunctionReturn(0); 2791 } 2792 2793 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2794 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2798 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2799 { 2800 Mat_SeqBAIJ *b; 2801 PetscErrorCode ierr; 2802 PetscInt i,mbs,nbs,bs2; 2803 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2804 2805 PetscFunctionBegin; 2806 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2807 if (nz == MAT_SKIP_ALLOCATION) { 2808 skipallocation = PETSC_TRUE; 2809 nz = 0; 2810 } 2811 2812 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2813 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2814 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2815 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2816 2817 B->preallocated = PETSC_TRUE; 2818 2819 mbs = B->rmap->n/bs; 2820 nbs = B->cmap->n/bs; 2821 bs2 = bs*bs; 2822 2823 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2824 2825 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2826 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2827 if (nnz) { 2828 for (i=0; i<mbs; i++) { 2829 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2830 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2831 } 2832 } 2833 2834 b = (Mat_SeqBAIJ*)B->data; 2835 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2836 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2837 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2838 2839 if (!flg) { 2840 switch (bs) { 2841 case 1: 2842 B->ops->mult = MatMult_SeqBAIJ_1; 2843 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2844 break; 2845 case 2: 2846 B->ops->mult = MatMult_SeqBAIJ_2; 2847 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2848 break; 2849 case 3: 2850 B->ops->mult = MatMult_SeqBAIJ_3; 2851 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2852 break; 2853 case 4: 2854 B->ops->mult = MatMult_SeqBAIJ_4; 2855 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2856 break; 2857 case 5: 2858 B->ops->mult = MatMult_SeqBAIJ_5; 2859 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2860 break; 2861 case 6: 2862 B->ops->mult = MatMult_SeqBAIJ_6; 2863 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2864 break; 2865 case 7: 2866 B->ops->mult = MatMult_SeqBAIJ_7; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2868 break; 2869 case 15: 2870 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2872 break; 2873 default: 2874 B->ops->mult = MatMult_SeqBAIJ_N; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2876 break; 2877 } 2878 } 2879 B->ops->sor = MatSOR_SeqBAIJ; 2880 b->mbs = mbs; 2881 b->nbs = nbs; 2882 if (!skipallocation) { 2883 if (!b->imax) { 2884 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2885 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2886 2887 b->free_imax_ilen = PETSC_TRUE; 2888 } 2889 /* b->ilen will count nonzeros in each block row so far. */ 2890 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2891 if (!nnz) { 2892 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2893 else if (nz < 0) nz = 1; 2894 for (i=0; i<mbs; i++) b->imax[i] = nz; 2895 nz = nz*mbs; 2896 } else { 2897 nz = 0; 2898 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2899 } 2900 2901 /* allocate the matrix space */ 2902 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2903 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2904 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2905 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2906 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2907 2908 b->singlemalloc = PETSC_TRUE; 2909 b->i[0] = 0; 2910 for (i=1; i<mbs+1; i++) { 2911 b->i[i] = b->i[i-1] + b->imax[i-1]; 2912 } 2913 b->free_a = PETSC_TRUE; 2914 b->free_ij = PETSC_TRUE; 2915 #if defined(PETSC_THREADCOMM_ACTIVE) 2916 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2917 #endif 2918 } else { 2919 b->free_a = PETSC_FALSE; 2920 b->free_ij = PETSC_FALSE; 2921 } 2922 2923 b->bs2 = bs2; 2924 b->mbs = mbs; 2925 b->nz = 0; 2926 b->maxnz = nz; 2927 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2928 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2929 PetscFunctionReturn(0); 2930 } 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2934 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2935 { 2936 PetscInt i,m,nz,nz_max=0,*nnz; 2937 PetscScalar *values=0; 2938 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2943 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2944 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2945 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2946 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2947 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2948 m = B->rmap->n/bs; 2949 2950 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2951 ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr); 2952 for (i=0; i<m; i++) { 2953 nz = ii[i+1]- ii[i]; 2954 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2955 nz_max = PetscMax(nz_max, nz); 2956 nnz[i] = nz; 2957 } 2958 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2959 ierr = PetscFree(nnz);CHKERRQ(ierr); 2960 2961 values = (PetscScalar*)V; 2962 if (!values) { 2963 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2964 } 2965 for (i=0; i<m; i++) { 2966 PetscInt ncols = ii[i+1] - ii[i]; 2967 const PetscInt *icols = jj + ii[i]; 2968 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2969 if (!roworiented) { 2970 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2971 } else { 2972 PetscInt j; 2973 for (j=0; j<ncols; j++) { 2974 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2975 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2976 } 2977 } 2978 } 2979 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2980 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2983 PetscFunctionReturn(0); 2984 } 2985 2986 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2987 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 2988 #if defined(PETSC_HAVE_MUMPS) 2989 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2990 #endif 2991 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 2992 2993 /*MC 2994 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2995 block sparse compressed row format. 2996 2997 Options Database Keys: 2998 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2999 3000 Level: beginner 3001 3002 .seealso: MatCreateSeqBAIJ() 3003 M*/ 3004 3005 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3006 3007 #undef __FUNCT__ 3008 #define __FUNCT__ "MatCreate_SeqBAIJ" 3009 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3010 { 3011 PetscErrorCode ierr; 3012 PetscMPIInt size; 3013 Mat_SeqBAIJ *b; 3014 3015 PetscFunctionBegin; 3016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3017 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3018 3019 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3020 B->data = (void*)b; 3021 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3022 3023 b->row = 0; 3024 b->col = 0; 3025 b->icol = 0; 3026 b->reallocs = 0; 3027 b->saved_values = 0; 3028 3029 b->roworiented = PETSC_TRUE; 3030 b->nonew = 0; 3031 b->diag = 0; 3032 b->solve_work = 0; 3033 b->mult_work = 0; 3034 B->spptr = 0; 3035 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3036 b->keepnonzeropattern = PETSC_FALSE; 3037 b->xtoy = 0; 3038 b->XtoY = 0; 3039 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3043 #if defined(PETSC_HAVE_MUMPS) 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3045 #endif 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3062 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3063 { 3064 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3065 PetscErrorCode ierr; 3066 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3067 3068 PetscFunctionBegin; 3069 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3070 3071 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3072 c->imax = a->imax; 3073 c->ilen = a->ilen; 3074 c->free_imax_ilen = PETSC_FALSE; 3075 } else { 3076 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3077 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3078 for (i=0; i<mbs; i++) { 3079 c->imax[i] = a->imax[i]; 3080 c->ilen[i] = a->ilen[i]; 3081 } 3082 c->free_imax_ilen = PETSC_TRUE; 3083 } 3084 3085 /* allocate the matrix space */ 3086 if (mallocmatspace) { 3087 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3088 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3089 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3090 3091 c->i = a->i; 3092 c->j = a->j; 3093 c->singlemalloc = PETSC_FALSE; 3094 c->free_a = PETSC_TRUE; 3095 c->free_ij = PETSC_FALSE; 3096 c->parent = A; 3097 C->preallocated = PETSC_TRUE; 3098 C->assembled = PETSC_TRUE; 3099 3100 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3101 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3102 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3103 } else { 3104 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3105 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3106 3107 c->singlemalloc = PETSC_TRUE; 3108 c->free_a = PETSC_TRUE; 3109 c->free_ij = PETSC_TRUE; 3110 3111 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3112 if (mbs > 0) { 3113 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3114 if (cpvalues == MAT_COPY_VALUES) { 3115 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3116 } else { 3117 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3118 } 3119 } 3120 C->preallocated = PETSC_TRUE; 3121 C->assembled = PETSC_TRUE; 3122 } 3123 } 3124 3125 c->roworiented = a->roworiented; 3126 c->nonew = a->nonew; 3127 3128 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3129 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3130 3131 c->bs2 = a->bs2; 3132 c->mbs = a->mbs; 3133 c->nbs = a->nbs; 3134 3135 if (a->diag) { 3136 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3137 c->diag = a->diag; 3138 c->free_diag = PETSC_FALSE; 3139 } else { 3140 ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr); 3141 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3142 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3143 c->free_diag = PETSC_TRUE; 3144 } 3145 } else c->diag = 0; 3146 3147 c->nz = a->nz; 3148 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3149 c->solve_work = 0; 3150 c->mult_work = 0; 3151 3152 c->compressedrow.use = a->compressedrow.use; 3153 c->compressedrow.nrows = a->compressedrow.nrows; 3154 if (a->compressedrow.use) { 3155 i = a->compressedrow.nrows; 3156 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3157 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3158 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3159 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3160 } else { 3161 c->compressedrow.use = PETSC_FALSE; 3162 c->compressedrow.i = NULL; 3163 c->compressedrow.rindex = NULL; 3164 } 3165 C->nonzerostate = A->nonzerostate; 3166 3167 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3168 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3169 PetscFunctionReturn(0); 3170 } 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3174 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3175 { 3176 PetscErrorCode ierr; 3177 3178 PetscFunctionBegin; 3179 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3180 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3181 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3182 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3183 PetscFunctionReturn(0); 3184 } 3185 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "MatLoad_SeqBAIJ" 3188 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3189 { 3190 Mat_SeqBAIJ *a; 3191 PetscErrorCode ierr; 3192 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3193 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3194 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3195 PetscInt *masked,nmask,tmp,bs2,ishift; 3196 PetscMPIInt size; 3197 int fd; 3198 PetscScalar *aa; 3199 MPI_Comm comm; 3200 3201 PetscFunctionBegin; 3202 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3203 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3204 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3205 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3206 bs2 = bs*bs; 3207 3208 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3209 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3210 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3211 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3212 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3213 M = header[1]; N = header[2]; nz = header[3]; 3214 3215 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3216 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3217 3218 /* 3219 This code adds extra rows to make sure the number of rows is 3220 divisible by the blocksize 3221 */ 3222 mbs = M/bs; 3223 extra_rows = bs - M + bs*(mbs); 3224 if (extra_rows == bs) extra_rows = 0; 3225 else mbs++; 3226 if (extra_rows) { 3227 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3228 } 3229 3230 /* Set global sizes if not already set */ 3231 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3232 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3233 } else { /* Check if the matrix global sizes are correct */ 3234 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3235 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3236 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3237 } 3238 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3239 } 3240 3241 /* read in row lengths */ 3242 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 3243 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3244 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3245 3246 /* read in column indices */ 3247 ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr); 3248 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3249 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3250 3251 /* loop over row lengths determining block row lengths */ 3252 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3253 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3254 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3255 rowcount = 0; 3256 nzcount = 0; 3257 for (i=0; i<mbs; i++) { 3258 nmask = 0; 3259 for (j=0; j<bs; j++) { 3260 kmax = rowlengths[rowcount]; 3261 for (k=0; k<kmax; k++) { 3262 tmp = jj[nzcount++]/bs; 3263 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3264 } 3265 rowcount++; 3266 } 3267 browlengths[i] += nmask; 3268 /* zero out the mask elements we set */ 3269 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3270 } 3271 3272 /* Do preallocation */ 3273 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3274 a = (Mat_SeqBAIJ*)newmat->data; 3275 3276 /* set matrix "i" values */ 3277 a->i[0] = 0; 3278 for (i=1; i<= mbs; i++) { 3279 a->i[i] = a->i[i-1] + browlengths[i-1]; 3280 a->ilen[i-1] = browlengths[i-1]; 3281 } 3282 a->nz = 0; 3283 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3284 3285 /* read in nonzero values */ 3286 ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr); 3287 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3288 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3289 3290 /* set "a" and "j" values into matrix */ 3291 nzcount = 0; jcount = 0; 3292 for (i=0; i<mbs; i++) { 3293 nzcountb = nzcount; 3294 nmask = 0; 3295 for (j=0; j<bs; j++) { 3296 kmax = rowlengths[i*bs+j]; 3297 for (k=0; k<kmax; k++) { 3298 tmp = jj[nzcount++]/bs; 3299 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3300 } 3301 } 3302 /* sort the masked values */ 3303 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3304 3305 /* set "j" values into matrix */ 3306 maskcount = 1; 3307 for (j=0; j<nmask; j++) { 3308 a->j[jcount++] = masked[j]; 3309 mask[masked[j]] = maskcount++; 3310 } 3311 /* set "a" values into matrix */ 3312 ishift = bs2*a->i[i]; 3313 for (j=0; j<bs; j++) { 3314 kmax = rowlengths[i*bs+j]; 3315 for (k=0; k<kmax; k++) { 3316 tmp = jj[nzcountb]/bs; 3317 block = mask[tmp] - 1; 3318 point = jj[nzcountb] - bs*tmp; 3319 idx = ishift + bs2*block + j + bs*point; 3320 a->a[idx] = (MatScalar)aa[nzcountb++]; 3321 } 3322 } 3323 /* zero out the mask elements we set */ 3324 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3325 } 3326 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3327 3328 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3329 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3330 ierr = PetscFree(aa);CHKERRQ(ierr); 3331 ierr = PetscFree(jj);CHKERRQ(ierr); 3332 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3333 3334 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3335 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatCreateSeqBAIJ" 3341 /*@C 3342 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3343 compressed row) format. For good matrix assembly performance the 3344 user should preallocate the matrix storage by setting the parameter nz 3345 (or the array nnz). By setting these parameters accurately, performance 3346 during matrix assembly can be increased by more than a factor of 50. 3347 3348 Collective on MPI_Comm 3349 3350 Input Parameters: 3351 + comm - MPI communicator, set to PETSC_COMM_SELF 3352 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3353 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3354 . m - number of rows 3355 . n - number of columns 3356 . nz - number of nonzero blocks per block row (same for all rows) 3357 - nnz - array containing the number of nonzero blocks in the various block rows 3358 (possibly different for each block row) or NULL 3359 3360 Output Parameter: 3361 . A - the matrix 3362 3363 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3364 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3365 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3366 3367 Options Database Keys: 3368 . -mat_no_unroll - uses code that does not unroll the loops in the 3369 block calculations (much slower) 3370 . -mat_block_size - size of the blocks to use 3371 3372 Level: intermediate 3373 3374 Notes: 3375 The number of rows and columns must be divisible by blocksize. 3376 3377 If the nnz parameter is given then the nz parameter is ignored 3378 3379 A nonzero block is any block that as 1 or more nonzeros in it 3380 3381 The block AIJ format is fully compatible with standard Fortran 77 3382 storage. That is, the stored row and column indices can begin at 3383 either one (as in Fortran) or zero. See the users' manual for details. 3384 3385 Specify the preallocated storage with either nz or nnz (not both). 3386 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3387 allocation. See Users-Manual: ch_mat for details. 3388 matrices. 3389 3390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3391 @*/ 3392 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3393 { 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3398 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3399 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3400 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3406 /*@C 3407 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3408 per row in the matrix. For good matrix assembly performance the 3409 user should preallocate the matrix storage by setting the parameter nz 3410 (or the array nnz). By setting these parameters accurately, performance 3411 during matrix assembly can be increased by more than a factor of 50. 3412 3413 Collective on MPI_Comm 3414 3415 Input Parameters: 3416 + B - the matrix 3417 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3418 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3419 . nz - number of block nonzeros per block row (same for all rows) 3420 - nnz - array containing the number of block nonzeros in the various block rows 3421 (possibly different for each block row) or NULL 3422 3423 Options Database Keys: 3424 . -mat_no_unroll - uses code that does not unroll the loops in the 3425 block calculations (much slower) 3426 . -mat_block_size - size of the blocks to use 3427 3428 Level: intermediate 3429 3430 Notes: 3431 If the nnz parameter is given then the nz parameter is ignored 3432 3433 You can call MatGetInfo() to get information on how effective the preallocation was; 3434 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3435 You can also run with the option -info and look for messages with the string 3436 malloc in them to see if additional memory allocation was needed. 3437 3438 The block AIJ format is fully compatible with standard Fortran 77 3439 storage. That is, the stored row and column indices can begin at 3440 either one (as in Fortran) or zero. See the users' manual for details. 3441 3442 Specify the preallocated storage with either nz or nnz (not both). 3443 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3444 allocation. See Users-Manual: ch_mat for details. 3445 3446 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3447 @*/ 3448 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3449 { 3450 PetscErrorCode ierr; 3451 3452 PetscFunctionBegin; 3453 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3454 PetscValidType(B,1); 3455 PetscValidLogicalCollectiveInt(B,bs,2); 3456 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3457 PetscFunctionReturn(0); 3458 } 3459 3460 #undef __FUNCT__ 3461 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3462 /*@C 3463 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3464 (the default sequential PETSc format). 3465 3466 Collective on MPI_Comm 3467 3468 Input Parameters: 3469 + B - the matrix 3470 . i - the indices into j for the start of each local row (starts with zero) 3471 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3472 - v - optional values in the matrix 3473 3474 Level: developer 3475 3476 Notes: 3477 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3478 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3479 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3480 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3481 block column and the second index is over columns within a block. 3482 3483 .keywords: matrix, aij, compressed row, sparse 3484 3485 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3486 @*/ 3487 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3488 { 3489 PetscErrorCode ierr; 3490 3491 PetscFunctionBegin; 3492 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3493 PetscValidType(B,1); 3494 PetscValidLogicalCollectiveInt(B,bs,2); 3495 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3496 PetscFunctionReturn(0); 3497 } 3498 3499 3500 #undef __FUNCT__ 3501 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3502 /*@ 3503 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3504 3505 Collective on MPI_Comm 3506 3507 Input Parameters: 3508 + comm - must be an MPI communicator of size 1 3509 . bs - size of block 3510 . m - number of rows 3511 . n - number of columns 3512 . i - row indices 3513 . j - column indices 3514 - a - matrix values 3515 3516 Output Parameter: 3517 . mat - the matrix 3518 3519 Level: advanced 3520 3521 Notes: 3522 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3523 once the matrix is destroyed 3524 3525 You cannot set new nonzero locations into this matrix, that will generate an error. 3526 3527 The i and j indices are 0 based 3528 3529 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3530 3531 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3532 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3533 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3534 with column-major ordering within blocks. 3535 3536 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3537 3538 @*/ 3539 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3540 { 3541 PetscErrorCode ierr; 3542 PetscInt ii; 3543 Mat_SeqBAIJ *baij; 3544 3545 PetscFunctionBegin; 3546 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3547 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3548 3549 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3550 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3551 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3552 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3553 baij = (Mat_SeqBAIJ*)(*mat)->data; 3554 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3555 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3556 3557 baij->i = i; 3558 baij->j = j; 3559 baij->a = a; 3560 3561 baij->singlemalloc = PETSC_FALSE; 3562 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3563 baij->free_a = PETSC_FALSE; 3564 baij->free_ij = PETSC_FALSE; 3565 3566 for (ii=0; ii<m; ii++) { 3567 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3568 #if defined(PETSC_USE_DEBUG) 3569 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3570 #endif 3571 } 3572 #if defined(PETSC_USE_DEBUG) 3573 for (ii=0; ii<baij->i[m]; ii++) { 3574 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3575 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3576 } 3577 #endif 3578 3579 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3580 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3581 PetscFunctionReturn(0); 3582 } 3583