1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*jj = a->j,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !jj) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (jj[diag[i]] != i) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1200 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1202 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1204 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1205 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1206 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1208 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1209 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1210 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1211 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1212 1213 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1214 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1215 ierr = PetscFree(A->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1234 { 1235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_ROW_ORIENTED: 1241 a->roworiented = flg; 1242 break; 1243 case MAT_KEEP_NONZERO_PATTERN: 1244 a->keepnonzeropattern = flg; 1245 break; 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 a->nonew = (flg ? 0 : 1); 1248 break; 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 a->nonew = (flg ? -1 : 0); 1251 break; 1252 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1253 a->nonew = (flg ? -2 : 0); 1254 break; 1255 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1256 a->nounused = (flg ? -1 : 0); 1257 break; 1258 case MAT_NEW_DIAGONALS: 1259 case MAT_IGNORE_OFF_PROC_ENTRIES: 1260 case MAT_USE_HASH_TABLE: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1278 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1283 MatScalar *aa,*aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 ai = a->i; 1289 aj = a->j; 1290 aa = a->a; 1291 bs2 = a->bs2; 1292 1293 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1294 1295 bn = row/bs; /* Block number */ 1296 bp = row % bs; /* Block Position */ 1297 M = ai[bn+1] - ai[bn]; 1298 *nz = bs*M; 1299 1300 if (v) { 1301 *v = 0; 1302 if (*nz) { 1303 ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr); 1304 for (i=0; i<M; i++) { /* for each block in the block row */ 1305 v_i = *v + i*bs; 1306 aa_i = aa + bs2*(ai[bn] + i); 1307 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1308 } 1309 } 1310 } 1311 1312 if (idx) { 1313 *idx = 0; 1314 if (*nz) { 1315 ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr); 1316 for (i=0; i<M; i++) { /* for each block in the block row */ 1317 idx_i = *idx + i*bs; 1318 itmp = bs*aj[ai[bn] + i]; 1319 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1320 } 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1334 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1335 PetscFunctionReturn(0); 1336 } 1337 1338 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1343 { 1344 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1345 Mat C; 1346 PetscErrorCode ierr; 1347 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1348 PetscInt *rows,*cols,bs2=a->bs2; 1349 MatScalar *array; 1350 1351 PetscFunctionBegin; 1352 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1353 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1354 ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr); 1355 1356 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1357 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1358 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1359 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1360 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1361 ierr = PetscFree(col);CHKERRQ(ierr); 1362 } else { 1363 C = *B; 1364 } 1365 1366 array = a->a; 1367 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1368 for (i=0; i<mbs; i++) { 1369 cols[0] = i*bs; 1370 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1371 len = ai[i+1] - ai[i]; 1372 for (j=0; j<len; j++) { 1373 rows[0] = (*aj++)*bs; 1374 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1375 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1376 array += bs2; 1377 } 1378 } 1379 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1380 1381 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1382 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 1384 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1385 *B = C; 1386 } else { 1387 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1394 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1395 { 1396 PetscErrorCode ierr; 1397 Mat Btrans; 1398 1399 PetscFunctionBegin; 1400 *f = PETSC_FALSE; 1401 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1402 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1403 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1409 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1410 { 1411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1412 PetscErrorCode ierr; 1413 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1414 int fd; 1415 PetscScalar *aa; 1416 FILE *file; 1417 1418 PetscFunctionBegin; 1419 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1420 ierr = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr); 1421 col_lens[0] = MAT_FILE_CLASSID; 1422 1423 col_lens[1] = A->rmap->N; 1424 col_lens[2] = A->cmap->n; 1425 col_lens[3] = a->nz*bs2; 1426 1427 /* store lengths of each row and write (including header) to file */ 1428 count = 0; 1429 for (i=0; i<a->mbs; i++) { 1430 for (j=0; j<bs; j++) { 1431 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1432 } 1433 } 1434 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1435 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1436 1437 /* store column indices (zero start index) */ 1438 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1439 count = 0; 1440 for (i=0; i<a->mbs; i++) { 1441 for (j=0; j<bs; j++) { 1442 for (k=a->i[i]; k<a->i[i+1]; k++) { 1443 for (l=0; l<bs; l++) { 1444 jj[count++] = bs*a->j[k] + l; 1445 } 1446 } 1447 } 1448 } 1449 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1450 ierr = PetscFree(jj);CHKERRQ(ierr); 1451 1452 /* store nonzero values */ 1453 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1454 count = 0; 1455 for (i=0; i<a->mbs; i++) { 1456 for (j=0; j<bs; j++) { 1457 for (k=a->i[i]; k<a->i[i+1]; k++) { 1458 for (l=0; l<bs; l++) { 1459 aa[count++] = a->a[bs2*k + l*bs + j]; 1460 } 1461 } 1462 } 1463 } 1464 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1465 ierr = PetscFree(aa);CHKERRQ(ierr); 1466 1467 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1468 if (file) { 1469 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1476 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1477 { 1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1481 PetscViewerFormat format; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1485 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1486 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1487 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1488 Mat aij; 1489 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1490 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1491 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 for (i=0; i<a->mbs; i++) { 1497 for (j=0; j<bs; j++) { 1498 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1499 for (k=a->i[i]; k<a->i[i+1]; k++) { 1500 for (l=0; l<bs; l++) { 1501 #if defined(PETSC_USE_COMPLEX) 1502 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1503 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1504 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1505 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1506 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1507 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1508 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1509 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1510 } 1511 #else 1512 if (a->a[bs2*k + l*bs + j] != 0.0) { 1513 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1514 } 1515 #endif 1516 } 1517 } 1518 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1519 } 1520 } 1521 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1522 } else { 1523 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1524 for (i=0; i<a->mbs; i++) { 1525 for (j=0; j<bs; j++) { 1526 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1527 for (k=a->i[i]; k<a->i[i+1]; k++) { 1528 for (l=0; l<bs; l++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1532 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1533 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1535 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } 1539 #else 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1541 #endif 1542 } 1543 } 1544 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1545 } 1546 } 1547 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1548 } 1549 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #include <petscdraw.h> 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1556 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1557 { 1558 Mat A = (Mat) Aa; 1559 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1560 PetscErrorCode ierr; 1561 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1562 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1563 MatScalar *aa; 1564 PetscViewer viewer; 1565 PetscViewerFormat format; 1566 1567 PetscFunctionBegin; 1568 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1569 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1570 1571 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1572 1573 /* loop over matrix elements drawing boxes */ 1574 1575 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1576 color = PETSC_DRAW_BLUE; 1577 for (i=0,row=0; i<mbs; i++,row+=bs) { 1578 for (j=a->i[i]; j<a->i[i+1]; j++) { 1579 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1580 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1581 aa = a->a + j*bs2; 1582 for (k=0; k<bs; k++) { 1583 for (l=0; l<bs; l++) { 1584 if (PetscRealPart(*aa++) >= 0.) continue; 1585 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1586 } 1587 } 1588 } 1589 } 1590 color = PETSC_DRAW_CYAN; 1591 for (i=0,row=0; i<mbs; i++,row+=bs) { 1592 for (j=a->i[i]; j<a->i[i+1]; j++) { 1593 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1594 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1595 aa = a->a + j*bs2; 1596 for (k=0; k<bs; k++) { 1597 for (l=0; l<bs; l++) { 1598 if (PetscRealPart(*aa++) != 0.) continue; 1599 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1600 } 1601 } 1602 } 1603 } 1604 color = PETSC_DRAW_RED; 1605 for (i=0,row=0; i<mbs; i++,row+=bs) { 1606 for (j=a->i[i]; j<a->i[i+1]; j++) { 1607 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1608 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1609 aa = a->a + j*bs2; 1610 for (k=0; k<bs; k++) { 1611 for (l=0; l<bs; l++) { 1612 if (PetscRealPart(*aa++) <= 0.) continue; 1613 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1614 } 1615 } 1616 } 1617 } 1618 } else { 1619 /* use contour shading to indicate magnitude of values */ 1620 /* first determine max of all nonzero values */ 1621 PetscDraw popup; 1622 PetscReal scale,maxv = 0.0; 1623 1624 for (i=0; i<a->nz*a->bs2; i++) { 1625 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1626 } 1627 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1628 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1629 if (popup) { 1630 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1631 } 1632 for (i=0,row=0; i<mbs; i++,row+=bs) { 1633 for (j=a->i[i]; j<a->i[i+1]; j++) { 1634 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1635 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1636 aa = a->a + j*bs2; 1637 for (k=0; k<bs; k++) { 1638 for (l=0; l<bs; l++) { 1639 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1640 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 } 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1651 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1652 { 1653 PetscErrorCode ierr; 1654 PetscReal xl,yl,xr,yr,w,h; 1655 PetscDraw draw; 1656 PetscBool isnull; 1657 1658 PetscFunctionBegin; 1659 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1660 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1661 1662 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1663 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1664 xr += w; yr += h; xl = -w; yl = -h; 1665 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1666 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1667 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatView_SeqBAIJ" 1673 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1674 { 1675 PetscErrorCode ierr; 1676 PetscBool iascii,isbinary,isdraw; 1677 1678 PetscFunctionBegin; 1679 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1680 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1681 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1682 if (iascii) { 1683 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1684 } else if (isbinary) { 1685 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1686 } else if (isdraw) { 1687 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1688 } else { 1689 Mat B; 1690 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1691 ierr = MatView(B,viewer);CHKERRQ(ierr); 1692 ierr = MatDestroy(&B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1700 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1701 { 1702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1703 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1704 PetscInt *ai = a->i,*ailen = a->ilen; 1705 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1706 MatScalar *ap,*aa = a->a; 1707 1708 PetscFunctionBegin; 1709 for (k=0; k<m; k++) { /* loop over rows */ 1710 row = im[k]; brow = row/bs; 1711 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1712 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1713 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1714 nrow = ailen[brow]; 1715 for (l=0; l<n; l++) { /* loop over columns */ 1716 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1717 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1718 col = in[l]; 1719 bcol = col/bs; 1720 cidx = col%bs; 1721 ridx = row%bs; 1722 high = nrow; 1723 low = 0; /* assume unsorted */ 1724 while (high-low > 5) { 1725 t = (low+high)/2; 1726 if (rp[t] > bcol) high = t; 1727 else low = t; 1728 } 1729 for (i=low; i<high; i++) { 1730 if (rp[i] > bcol) break; 1731 if (rp[i] == bcol) { 1732 *v++ = ap[bs2*i+bs*cidx+ridx]; 1733 goto finished; 1734 } 1735 } 1736 *v++ = 0.0; 1737 finished:; 1738 } 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1745 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1746 { 1747 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1748 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1749 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1750 PetscErrorCode ierr; 1751 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1752 PetscBool roworiented=a->roworiented; 1753 const PetscScalar *value = v; 1754 MatScalar *ap,*aa = a->a,*bap; 1755 1756 PetscFunctionBegin; 1757 if (roworiented) { 1758 stepval = (n-1)*bs; 1759 } else { 1760 stepval = (m-1)*bs; 1761 } 1762 for (k=0; k<m; k++) { /* loop over added rows */ 1763 row = im[k]; 1764 if (row < 0) continue; 1765 #if defined(PETSC_USE_DEBUG) 1766 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1767 #endif 1768 rp = aj + ai[row]; 1769 ap = aa + bs2*ai[row]; 1770 rmax = imax[row]; 1771 nrow = ailen[row]; 1772 low = 0; 1773 high = nrow; 1774 for (l=0; l<n; l++) { /* loop over added columns */ 1775 if (in[l] < 0) continue; 1776 #if defined(PETSC_USE_DEBUG) 1777 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1778 #endif 1779 col = in[l]; 1780 if (roworiented) { 1781 value = v + (k*(stepval+bs) + l)*bs; 1782 } else { 1783 value = v + (l*(stepval+bs) + k)*bs; 1784 } 1785 if (col <= lastcol) low = 0; 1786 else high = nrow; 1787 lastcol = col; 1788 while (high-low > 7) { 1789 t = (low+high)/2; 1790 if (rp[t] > col) high = t; 1791 else low = t; 1792 } 1793 for (i=low; i<high; i++) { 1794 if (rp[i] > col) break; 1795 if (rp[i] == col) { 1796 bap = ap + bs2*i; 1797 if (roworiented) { 1798 if (is == ADD_VALUES) { 1799 for (ii=0; ii<bs; ii++,value+=stepval) { 1800 for (jj=ii; jj<bs2; jj+=bs) { 1801 bap[jj] += *value++; 1802 } 1803 } 1804 } else { 1805 for (ii=0; ii<bs; ii++,value+=stepval) { 1806 for (jj=ii; jj<bs2; jj+=bs) { 1807 bap[jj] = *value++; 1808 } 1809 } 1810 } 1811 } else { 1812 if (is == ADD_VALUES) { 1813 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1814 for (jj=0; jj<bs; jj++) { 1815 bap[jj] += value[jj]; 1816 } 1817 bap += bs; 1818 } 1819 } else { 1820 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1821 for (jj=0; jj<bs; jj++) { 1822 bap[jj] = value[jj]; 1823 } 1824 bap += bs; 1825 } 1826 } 1827 } 1828 goto noinsert2; 1829 } 1830 } 1831 if (nonew == 1) goto noinsert2; 1832 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1833 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1834 N = nrow++ - 1; high++; 1835 /* shift up all the later entries in this row */ 1836 for (ii=N; ii>=i; ii--) { 1837 rp[ii+1] = rp[ii]; 1838 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1839 } 1840 if (N >= i) { 1841 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1842 } 1843 rp[i] = col; 1844 bap = ap + bs2*i; 1845 if (roworiented) { 1846 for (ii=0; ii<bs; ii++,value+=stepval) { 1847 for (jj=ii; jj<bs2; jj+=bs) { 1848 bap[jj] = *value++; 1849 } 1850 } 1851 } else { 1852 for (ii=0; ii<bs; ii++,value+=stepval) { 1853 for (jj=0; jj<bs; jj++) { 1854 *bap++ = *value++; 1855 } 1856 } 1857 } 1858 noinsert2:; 1859 low = i; 1860 } 1861 ailen[row] = nrow; 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1868 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1869 { 1870 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1871 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1872 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1873 PetscErrorCode ierr; 1874 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1875 MatScalar *aa = a->a,*ap; 1876 PetscReal ratio=0.6; 1877 1878 PetscFunctionBegin; 1879 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1880 1881 if (m) rmax = ailen[0]; 1882 for (i=1; i<mbs; i++) { 1883 /* move each row back by the amount of empty slots (fshift) before it*/ 1884 fshift += imax[i-1] - ailen[i-1]; 1885 rmax = PetscMax(rmax,ailen[i]); 1886 if (fshift) { 1887 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1888 N = ailen[i]; 1889 for (j=0; j<N; j++) { 1890 ip[j-fshift] = ip[j]; 1891 1892 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1893 } 1894 } 1895 ai[i] = ai[i-1] + ailen[i-1]; 1896 } 1897 if (mbs) { 1898 fshift += imax[mbs-1] - ailen[mbs-1]; 1899 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1900 } 1901 1902 /* reset ilen and imax for each row */ 1903 a->nonzerorowcnt = 0; 1904 for (i=0; i<mbs; i++) { 1905 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1906 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1907 } 1908 a->nz = ai[mbs]; 1909 1910 /* diagonals may have moved, so kill the diagonal pointers */ 1911 a->idiagvalid = PETSC_FALSE; 1912 if (fshift && a->diag) { 1913 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1914 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1915 a->diag = 0; 1916 } 1917 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1918 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1919 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1920 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1921 1922 A->info.mallocs += a->reallocs; 1923 a->reallocs = 0; 1924 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1925 1926 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /* 1931 This function returns an array of flags which indicate the locations of contiguous 1932 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1933 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1934 Assume: sizes should be long enough to hold all the values. 1935 */ 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1938 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1939 { 1940 PetscInt i,j,k,row; 1941 PetscBool flg; 1942 1943 PetscFunctionBegin; 1944 for (i=0,j=0; i<n; j++) { 1945 row = idx[i]; 1946 if (row%bs!=0) { /* Not the begining of a block */ 1947 sizes[j] = 1; 1948 i++; 1949 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1950 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1951 i++; 1952 } else { /* Begining of the block, so check if the complete block exists */ 1953 flg = PETSC_TRUE; 1954 for (k=1; k<bs; k++) { 1955 if (row+k != idx[i+k]) { /* break in the block */ 1956 flg = PETSC_FALSE; 1957 break; 1958 } 1959 } 1960 if (flg) { /* No break in the bs */ 1961 sizes[j] = bs; 1962 i += bs; 1963 } else { 1964 sizes[j] = 1; 1965 i++; 1966 } 1967 } 1968 } 1969 *bs_max = j; 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1975 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1976 { 1977 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1978 PetscErrorCode ierr; 1979 PetscInt i,j,k,count,*rows; 1980 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1981 PetscScalar zero = 0.0; 1982 MatScalar *aa; 1983 const PetscScalar *xx; 1984 PetscScalar *bb; 1985 1986 PetscFunctionBegin; 1987 /* fix right hand side if needed */ 1988 if (x && b) { 1989 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1990 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1991 for (i=0; i<is_n; i++) { 1992 bb[is_idx[i]] = diag*xx[is_idx[i]]; 1993 } 1994 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1995 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1996 } 1997 1998 /* Make a copy of the IS and sort it */ 1999 /* allocate memory for rows,sizes */ 2000 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2001 2002 /* copy IS values to rows, and sort them */ 2003 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2004 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2005 2006 if (baij->keepnonzeropattern) { 2007 for (i=0; i<is_n; i++) sizes[i] = 1; 2008 bs_max = is_n; 2009 } else { 2010 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2011 A->nonzerostate++; 2012 } 2013 2014 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2015 row = rows[j]; 2016 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2017 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2018 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2019 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2020 if (diag != (PetscScalar)0.0) { 2021 if (baij->ilen[row/bs] > 0) { 2022 baij->ilen[row/bs] = 1; 2023 baij->j[baij->i[row/bs]] = row/bs; 2024 2025 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2026 } 2027 /* Now insert all the diagonal values for this bs */ 2028 for (k=0; k<bs; k++) { 2029 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2030 } 2031 } else { /* (diag == 0.0) */ 2032 baij->ilen[row/bs] = 0; 2033 } /* end (diag == 0.0) */ 2034 } else { /* (sizes[i] != bs) */ 2035 #if defined(PETSC_USE_DEBUG) 2036 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2037 #endif 2038 for (k=0; k<count; k++) { 2039 aa[0] = zero; 2040 aa += bs; 2041 } 2042 if (diag != (PetscScalar)0.0) { 2043 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2044 } 2045 } 2046 } 2047 2048 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2049 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2055 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2056 { 2057 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2058 PetscErrorCode ierr; 2059 PetscInt i,j,k,count; 2060 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2061 PetscScalar zero = 0.0; 2062 MatScalar *aa; 2063 const PetscScalar *xx; 2064 PetscScalar *bb; 2065 PetscBool *zeroed,vecs = PETSC_FALSE; 2066 2067 PetscFunctionBegin; 2068 /* fix right hand side if needed */ 2069 if (x && b) { 2070 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2071 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2072 vecs = PETSC_TRUE; 2073 } 2074 2075 /* zero the columns */ 2076 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2077 for (i=0; i<is_n; i++) { 2078 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2079 zeroed[is_idx[i]] = PETSC_TRUE; 2080 } 2081 for (i=0; i<A->rmap->N; i++) { 2082 if (!zeroed[i]) { 2083 row = i/bs; 2084 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2085 for (k=0; k<bs; k++) { 2086 col = bs*baij->j[j] + k; 2087 if (zeroed[col]) { 2088 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2089 if (vecs) bb[i] -= aa[0]*xx[col]; 2090 aa[0] = 0.0; 2091 } 2092 } 2093 } 2094 } else if (vecs) bb[i] = diag*xx[i]; 2095 } 2096 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2097 if (vecs) { 2098 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2099 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2100 } 2101 2102 /* zero the rows */ 2103 for (i=0; i<is_n; i++) { 2104 row = is_idx[i]; 2105 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2106 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2107 for (k=0; k<count; k++) { 2108 aa[0] = zero; 2109 aa += bs; 2110 } 2111 if (diag != (PetscScalar)0.0) { 2112 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2113 } 2114 } 2115 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2121 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2122 { 2123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2124 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2125 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2126 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2127 PetscErrorCode ierr; 2128 PetscInt ridx,cidx,bs2=a->bs2; 2129 PetscBool roworiented=a->roworiented; 2130 MatScalar *ap,value,*aa=a->a,*bap; 2131 2132 PetscFunctionBegin; 2133 if (v) PetscValidScalarPointer(v,6); 2134 for (k=0; k<m; k++) { /* loop over added rows */ 2135 row = im[k]; 2136 brow = row/bs; 2137 if (row < 0) continue; 2138 #if defined(PETSC_USE_DEBUG) 2139 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2140 #endif 2141 rp = aj + ai[brow]; 2142 ap = aa + bs2*ai[brow]; 2143 rmax = imax[brow]; 2144 nrow = ailen[brow]; 2145 low = 0; 2146 high = nrow; 2147 for (l=0; l<n; l++) { /* loop over added columns */ 2148 if (in[l] < 0) continue; 2149 #if defined(PETSC_USE_DEBUG) 2150 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2151 #endif 2152 col = in[l]; bcol = col/bs; 2153 ridx = row % bs; cidx = col % bs; 2154 if (roworiented) { 2155 value = v[l + k*n]; 2156 } else { 2157 value = v[k + l*m]; 2158 } 2159 if (col <= lastcol) low = 0; else high = nrow; 2160 lastcol = col; 2161 while (high-low > 7) { 2162 t = (low+high)/2; 2163 if (rp[t] > bcol) high = t; 2164 else low = t; 2165 } 2166 for (i=low; i<high; i++) { 2167 if (rp[i] > bcol) break; 2168 if (rp[i] == bcol) { 2169 bap = ap + bs2*i + bs*cidx + ridx; 2170 if (is == ADD_VALUES) *bap += value; 2171 else *bap = value; 2172 goto noinsert1; 2173 } 2174 } 2175 if (nonew == 1) goto noinsert1; 2176 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2177 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2178 N = nrow++ - 1; high++; 2179 /* shift up all the later entries in this row */ 2180 for (ii=N; ii>=i; ii--) { 2181 rp[ii+1] = rp[ii]; 2182 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2183 } 2184 if (N>=i) { 2185 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2186 } 2187 rp[i] = bcol; 2188 ap[bs2*i + bs*cidx + ridx] = value; 2189 a->nz++; 2190 A->nonzerostate++; 2191 noinsert1:; 2192 low = i; 2193 } 2194 ailen[brow] = nrow; 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2201 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2202 { 2203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2204 Mat outA; 2205 PetscErrorCode ierr; 2206 PetscBool row_identity,col_identity; 2207 2208 PetscFunctionBegin; 2209 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2210 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2211 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2212 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2213 2214 outA = inA; 2215 inA->factortype = MAT_FACTOR_LU; 2216 2217 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2218 2219 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2220 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2221 a->row = row; 2222 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2223 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2224 a->col = col; 2225 2226 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2227 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2228 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2229 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2230 2231 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2232 if (!a->solve_work) { 2233 ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr); 2234 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2235 } 2236 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2242 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2243 { 2244 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2245 PetscInt i,nz,mbs; 2246 2247 PetscFunctionBegin; 2248 nz = baij->maxnz; 2249 mbs = baij->mbs; 2250 for (i=0; i<nz; i++) { 2251 baij->j[i] = indices[i]; 2252 } 2253 baij->nz = nz; 2254 for (i=0; i<mbs; i++) { 2255 baij->ilen[i] = baij->imax[i]; 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2262 /*@ 2263 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2264 in the matrix. 2265 2266 Input Parameters: 2267 + mat - the SeqBAIJ matrix 2268 - indices - the column indices 2269 2270 Level: advanced 2271 2272 Notes: 2273 This can be called if you have precomputed the nonzero structure of the 2274 matrix and want to provide it to the matrix object to improve the performance 2275 of the MatSetValues() operation. 2276 2277 You MUST have set the correct numbers of nonzeros per row in the call to 2278 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2279 2280 MUST be called before any calls to MatSetValues(); 2281 2282 @*/ 2283 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2284 { 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2289 PetscValidPointer(indices,2); 2290 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2296 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2297 { 2298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2301 PetscReal atmp; 2302 PetscScalar *x,zero = 0.0; 2303 MatScalar *aa; 2304 PetscInt ncols,brow,krow,kcol; 2305 2306 PetscFunctionBegin; 2307 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2308 bs = A->rmap->bs; 2309 aa = a->a; 2310 ai = a->i; 2311 aj = a->j; 2312 mbs = a->mbs; 2313 2314 ierr = VecSet(v,zero);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2316 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2317 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2318 for (i=0; i<mbs; i++) { 2319 ncols = ai[1] - ai[0]; ai++; 2320 brow = bs*i; 2321 for (j=0; j<ncols; j++) { 2322 for (kcol=0; kcol<bs; kcol++) { 2323 for (krow=0; krow<bs; krow++) { 2324 atmp = PetscAbsScalar(*aa);aa++; 2325 row = brow + krow; /* row index */ 2326 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2327 } 2328 } 2329 aj++; 2330 } 2331 } 2332 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatCopy_SeqBAIJ" 2338 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2339 { 2340 PetscErrorCode ierr; 2341 2342 PetscFunctionBegin; 2343 /* If the two matrices have the same copy implementation, use fast copy. */ 2344 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2346 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2347 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2348 2349 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2350 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2351 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2352 } else { 2353 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2354 } 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2360 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2371 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2372 { 2373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2374 2375 PetscFunctionBegin; 2376 *array = a->a; 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2382 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2383 { 2384 PetscFunctionBegin; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2390 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2391 { 2392 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2395 PetscBLASInt one=1; 2396 2397 PetscFunctionBegin; 2398 if (str == SAME_NONZERO_PATTERN) { 2399 PetscScalar alpha = a; 2400 PetscBLASInt bnz; 2401 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2402 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2403 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2404 if (y->xtoy && y->XtoY != X) { 2405 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2406 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2407 } 2408 if (!y->xtoy) { /* get xtoy */ 2409 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2410 y->XtoY = X; 2411 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2412 } 2413 for (i=0; i<x->nz; i++) { 2414 j = 0; 2415 while (j < bs2) { 2416 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2417 j++; 2418 } 2419 } 2420 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2421 } else { 2422 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2423 } 2424 PetscFunctionReturn(0); 2425 } 2426 2427 #undef __FUNCT__ 2428 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2429 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2430 { 2431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2432 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2433 MatScalar *aa = a->a; 2434 2435 PetscFunctionBegin; 2436 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2442 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2443 { 2444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2445 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2446 MatScalar *aa = a->a; 2447 2448 PetscFunctionBegin; 2449 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2455 /* 2456 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2457 */ 2458 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2459 { 2460 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2461 PetscErrorCode ierr; 2462 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2463 PetscInt nz = a->i[m],row,*jj,mr,col; 2464 2465 PetscFunctionBegin; 2466 *nn = n; 2467 if (!ia) PetscFunctionReturn(0); 2468 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2469 else { 2470 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2471 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2472 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2473 jj = a->j; 2474 for (i=0; i<nz; i++) { 2475 collengths[jj[i]]++; 2476 } 2477 cia[0] = oshift; 2478 for (i=0; i<n; i++) { 2479 cia[i+1] = cia[i] + collengths[i]; 2480 } 2481 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2482 jj = a->j; 2483 for (row=0; row<m; row++) { 2484 mr = a->i[row+1] - a->i[row]; 2485 for (i=0; i<mr; i++) { 2486 col = *jj++; 2487 2488 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2489 } 2490 } 2491 ierr = PetscFree(collengths);CHKERRQ(ierr); 2492 *ia = cia; *ja = cja; 2493 } 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2499 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2500 { 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 if (!ia) PetscFunctionReturn(0); 2505 ierr = PetscFree(*ia);CHKERRQ(ierr); 2506 ierr = PetscFree(*ja);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 /* 2511 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2512 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2513 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2514 */ 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2517 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2518 { 2519 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2520 PetscErrorCode ierr; 2521 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2522 PetscInt nz = a->i[m],row,*jj,mr,col; 2523 PetscInt *cspidx; 2524 2525 PetscFunctionBegin; 2526 *nn = n; 2527 if (!ia) PetscFunctionReturn(0); 2528 2529 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2530 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2531 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2532 ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr); 2533 jj = a->j; 2534 for (i=0; i<nz; i++) { 2535 collengths[jj[i]]++; 2536 } 2537 cia[0] = oshift; 2538 for (i=0; i<n; i++) { 2539 cia[i+1] = cia[i] + collengths[i]; 2540 } 2541 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2542 jj = a->j; 2543 for (row=0; row<m; row++) { 2544 mr = a->i[row+1] - a->i[row]; 2545 for (i=0; i<mr; i++) { 2546 col = *jj++; 2547 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2548 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2549 } 2550 } 2551 ierr = PetscFree(collengths);CHKERRQ(ierr); 2552 *ia = cia; *ja = cja; 2553 *spidx = cspidx; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2559 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2565 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /* -------------------------------------------------------------------*/ 2570 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2571 MatGetRow_SeqBAIJ, 2572 MatRestoreRow_SeqBAIJ, 2573 MatMult_SeqBAIJ_N, 2574 /* 4*/ MatMultAdd_SeqBAIJ_N, 2575 MatMultTranspose_SeqBAIJ, 2576 MatMultTransposeAdd_SeqBAIJ, 2577 0, 2578 0, 2579 0, 2580 /* 10*/ 0, 2581 MatLUFactor_SeqBAIJ, 2582 0, 2583 0, 2584 MatTranspose_SeqBAIJ, 2585 /* 15*/ MatGetInfo_SeqBAIJ, 2586 MatEqual_SeqBAIJ, 2587 MatGetDiagonal_SeqBAIJ, 2588 MatDiagonalScale_SeqBAIJ, 2589 MatNorm_SeqBAIJ, 2590 /* 20*/ 0, 2591 MatAssemblyEnd_SeqBAIJ, 2592 MatSetOption_SeqBAIJ, 2593 MatZeroEntries_SeqBAIJ, 2594 /* 24*/ MatZeroRows_SeqBAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /* 29*/ MatSetUp_SeqBAIJ, 2600 0, 2601 0, 2602 0, 2603 0, 2604 /* 34*/ MatDuplicate_SeqBAIJ, 2605 0, 2606 0, 2607 MatILUFactor_SeqBAIJ, 2608 0, 2609 /* 39*/ MatAXPY_SeqBAIJ, 2610 MatGetSubMatrices_SeqBAIJ, 2611 MatIncreaseOverlap_SeqBAIJ, 2612 MatGetValues_SeqBAIJ, 2613 MatCopy_SeqBAIJ, 2614 /* 44*/ 0, 2615 MatScale_SeqBAIJ, 2616 0, 2617 0, 2618 MatZeroRowsColumns_SeqBAIJ, 2619 /* 49*/ 0, 2620 MatGetRowIJ_SeqBAIJ, 2621 MatRestoreRowIJ_SeqBAIJ, 2622 MatGetColumnIJ_SeqBAIJ, 2623 MatRestoreColumnIJ_SeqBAIJ, 2624 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2625 0, 2626 0, 2627 0, 2628 MatSetValuesBlocked_SeqBAIJ, 2629 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2630 MatDestroy_SeqBAIJ, 2631 MatView_SeqBAIJ, 2632 0, 2633 0, 2634 /* 64*/ 0, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2640 0, 2641 MatConvert_Basic, 2642 0, 2643 0, 2644 /* 74*/ 0, 2645 MatFDColoringApply_BAIJ, 2646 0, 2647 0, 2648 0, 2649 /* 79*/ 0, 2650 0, 2651 0, 2652 0, 2653 MatLoad_SeqBAIJ, 2654 /* 84*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /* 89*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /* 94*/ 0, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /* 99*/ 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*104*/ 0, 2675 MatRealPart_SeqBAIJ, 2676 MatImaginaryPart_SeqBAIJ, 2677 0, 2678 0, 2679 /*109*/ 0, 2680 0, 2681 0, 2682 0, 2683 MatMissingDiagonal_SeqBAIJ, 2684 /*114*/ 0, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /*119*/ 0, 2690 0, 2691 MatMultHermitianTranspose_SeqBAIJ, 2692 MatMultHermitianTransposeAdd_SeqBAIJ, 2693 0, 2694 /*124*/ 0, 2695 0, 2696 MatInvertBlockDiagonal_SeqBAIJ, 2697 0, 2698 0, 2699 /*129*/ 0, 2700 0, 2701 0, 2702 0, 2703 0, 2704 /*134*/ 0, 2705 0, 2706 0, 2707 0, 2708 0, 2709 /*139*/ 0, 2710 0, 2711 0, 2712 MatFDColoringSetUp_SeqXAIJ 2713 }; 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2717 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2718 { 2719 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2720 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2725 2726 /* allocate space for values if not already there */ 2727 if (!aij->saved_values) { 2728 ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr); 2729 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2730 } 2731 2732 /* copy values over */ 2733 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2739 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2740 { 2741 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2742 PetscErrorCode ierr; 2743 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2744 2745 PetscFunctionBegin; 2746 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2747 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2748 2749 /* copy values over */ 2750 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2755 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2759 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2760 { 2761 Mat_SeqBAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscInt i,mbs,nbs,bs2; 2764 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2765 2766 PetscFunctionBegin; 2767 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2768 if (nz == MAT_SKIP_ALLOCATION) { 2769 skipallocation = PETSC_TRUE; 2770 nz = 0; 2771 } 2772 2773 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2775 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2776 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2777 2778 B->preallocated = PETSC_TRUE; 2779 2780 mbs = B->rmap->n/bs; 2781 nbs = B->cmap->n/bs; 2782 bs2 = bs*bs; 2783 2784 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2785 2786 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2787 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2788 if (nnz) { 2789 for (i=0; i<mbs; i++) { 2790 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2791 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2792 } 2793 } 2794 2795 b = (Mat_SeqBAIJ*)B->data; 2796 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2797 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2798 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2799 2800 if (!flg) { 2801 switch (bs) { 2802 case 1: 2803 B->ops->mult = MatMult_SeqBAIJ_1; 2804 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2805 break; 2806 case 2: 2807 B->ops->mult = MatMult_SeqBAIJ_2; 2808 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2809 break; 2810 case 3: 2811 B->ops->mult = MatMult_SeqBAIJ_3; 2812 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2813 break; 2814 case 4: 2815 B->ops->mult = MatMult_SeqBAIJ_4; 2816 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2817 break; 2818 case 5: 2819 B->ops->mult = MatMult_SeqBAIJ_5; 2820 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2821 break; 2822 case 6: 2823 B->ops->mult = MatMult_SeqBAIJ_6; 2824 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2825 break; 2826 case 7: 2827 B->ops->mult = MatMult_SeqBAIJ_7; 2828 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2829 break; 2830 case 15: 2831 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2833 break; 2834 default: 2835 B->ops->mult = MatMult_SeqBAIJ_N; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2837 break; 2838 } 2839 } 2840 B->ops->sor = MatSOR_SeqBAIJ; 2841 b->mbs = mbs; 2842 b->nbs = nbs; 2843 if (!skipallocation) { 2844 if (!b->imax) { 2845 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2846 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2847 2848 b->free_imax_ilen = PETSC_TRUE; 2849 } 2850 /* b->ilen will count nonzeros in each block row so far. */ 2851 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2852 if (!nnz) { 2853 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2854 else if (nz < 0) nz = 1; 2855 for (i=0; i<mbs; i++) b->imax[i] = nz; 2856 nz = nz*mbs; 2857 } else { 2858 nz = 0; 2859 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2860 } 2861 2862 /* allocate the matrix space */ 2863 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2864 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2865 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2866 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2867 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2868 2869 b->singlemalloc = PETSC_TRUE; 2870 b->i[0] = 0; 2871 for (i=1; i<mbs+1; i++) { 2872 b->i[i] = b->i[i-1] + b->imax[i-1]; 2873 } 2874 b->free_a = PETSC_TRUE; 2875 b->free_ij = PETSC_TRUE; 2876 #if defined(PETSC_THREADCOMM_ACTIVE) 2877 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2878 #endif 2879 } else { 2880 b->free_a = PETSC_FALSE; 2881 b->free_ij = PETSC_FALSE; 2882 } 2883 2884 b->bs2 = bs2; 2885 b->mbs = mbs; 2886 b->nz = 0; 2887 b->maxnz = nz; 2888 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2889 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2895 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2896 { 2897 PetscInt i,m,nz,nz_max=0,*nnz; 2898 PetscScalar *values=0; 2899 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2900 PetscErrorCode ierr; 2901 2902 PetscFunctionBegin; 2903 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2904 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2905 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2906 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2907 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2909 m = B->rmap->n/bs; 2910 2911 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2912 ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr); 2913 for (i=0; i<m; i++) { 2914 nz = ii[i+1]- ii[i]; 2915 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2916 nz_max = PetscMax(nz_max, nz); 2917 nnz[i] = nz; 2918 } 2919 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2920 ierr = PetscFree(nnz);CHKERRQ(ierr); 2921 2922 values = (PetscScalar*)V; 2923 if (!values) { 2924 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2925 } 2926 for (i=0; i<m; i++) { 2927 PetscInt ncols = ii[i+1] - ii[i]; 2928 const PetscInt *icols = jj + ii[i]; 2929 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2930 if (!roworiented) { 2931 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2932 } else { 2933 PetscInt j; 2934 for (j=0; j<ncols; j++) { 2935 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2936 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2937 } 2938 } 2939 } 2940 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2941 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2942 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2948 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 2949 #if defined(PETSC_HAVE_MUMPS) 2950 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2951 #endif 2952 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 2953 2954 /*MC 2955 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2956 block sparse compressed row format. 2957 2958 Options Database Keys: 2959 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2960 2961 Level: beginner 2962 2963 .seealso: MatCreateSeqBAIJ() 2964 M*/ 2965 2966 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatCreate_SeqBAIJ" 2970 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2971 { 2972 PetscErrorCode ierr; 2973 PetscMPIInt size; 2974 Mat_SeqBAIJ *b; 2975 2976 PetscFunctionBegin; 2977 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2978 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2979 2980 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2981 B->data = (void*)b; 2982 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2983 2984 b->row = 0; 2985 b->col = 0; 2986 b->icol = 0; 2987 b->reallocs = 0; 2988 b->saved_values = 0; 2989 2990 b->roworiented = PETSC_TRUE; 2991 b->nonew = 0; 2992 b->diag = 0; 2993 b->solve_work = 0; 2994 b->mult_work = 0; 2995 B->spptr = 0; 2996 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 2997 b->keepnonzeropattern = PETSC_FALSE; 2998 b->xtoy = 0; 2999 b->XtoY = 0; 3000 3001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3004 #if defined(PETSC_HAVE_MUMPS) 3005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3006 #endif 3007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3017 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3018 PetscFunctionReturn(0); 3019 } 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3023 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3024 { 3025 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3026 PetscErrorCode ierr; 3027 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3028 3029 PetscFunctionBegin; 3030 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3031 3032 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3033 c->imax = a->imax; 3034 c->ilen = a->ilen; 3035 c->free_imax_ilen = PETSC_FALSE; 3036 } else { 3037 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3038 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3039 for (i=0; i<mbs; i++) { 3040 c->imax[i] = a->imax[i]; 3041 c->ilen[i] = a->ilen[i]; 3042 } 3043 c->free_imax_ilen = PETSC_TRUE; 3044 } 3045 3046 /* allocate the matrix space */ 3047 if (mallocmatspace) { 3048 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3049 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3050 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3051 3052 c->i = a->i; 3053 c->j = a->j; 3054 c->singlemalloc = PETSC_FALSE; 3055 c->free_a = PETSC_TRUE; 3056 c->free_ij = PETSC_FALSE; 3057 c->parent = A; 3058 C->preallocated = PETSC_TRUE; 3059 C->assembled = PETSC_TRUE; 3060 3061 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3062 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3063 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3064 } else { 3065 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3066 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3067 3068 c->singlemalloc = PETSC_TRUE; 3069 c->free_a = PETSC_TRUE; 3070 c->free_ij = PETSC_TRUE; 3071 3072 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3073 if (mbs > 0) { 3074 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3075 if (cpvalues == MAT_COPY_VALUES) { 3076 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3077 } else { 3078 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3079 } 3080 } 3081 C->preallocated = PETSC_TRUE; 3082 C->assembled = PETSC_TRUE; 3083 } 3084 } 3085 3086 c->roworiented = a->roworiented; 3087 c->nonew = a->nonew; 3088 3089 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3090 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3091 3092 c->bs2 = a->bs2; 3093 c->mbs = a->mbs; 3094 c->nbs = a->nbs; 3095 3096 if (a->diag) { 3097 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3098 c->diag = a->diag; 3099 c->free_diag = PETSC_FALSE; 3100 } else { 3101 ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr); 3102 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3103 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3104 c->free_diag = PETSC_TRUE; 3105 } 3106 } else c->diag = 0; 3107 3108 c->nz = a->nz; 3109 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3110 c->solve_work = 0; 3111 c->mult_work = 0; 3112 3113 c->compressedrow.use = a->compressedrow.use; 3114 c->compressedrow.nrows = a->compressedrow.nrows; 3115 if (a->compressedrow.use) { 3116 i = a->compressedrow.nrows; 3117 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3118 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3119 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3120 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3121 } else { 3122 c->compressedrow.use = PETSC_FALSE; 3123 c->compressedrow.i = NULL; 3124 c->compressedrow.rindex = NULL; 3125 } 3126 C->nonzerostate = A->nonzerostate; 3127 3128 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3129 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3135 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3136 { 3137 PetscErrorCode ierr; 3138 3139 PetscFunctionBegin; 3140 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3141 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3142 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3143 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3144 PetscFunctionReturn(0); 3145 } 3146 3147 #undef __FUNCT__ 3148 #define __FUNCT__ "MatLoad_SeqBAIJ" 3149 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3150 { 3151 Mat_SeqBAIJ *a; 3152 PetscErrorCode ierr; 3153 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3154 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3155 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3156 PetscInt *masked,nmask,tmp,bs2,ishift; 3157 PetscMPIInt size; 3158 int fd; 3159 PetscScalar *aa; 3160 MPI_Comm comm; 3161 3162 PetscFunctionBegin; 3163 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3164 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3165 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3166 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3167 bs2 = bs*bs; 3168 3169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3170 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3171 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3172 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3173 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3174 M = header[1]; N = header[2]; nz = header[3]; 3175 3176 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3177 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3178 3179 /* 3180 This code adds extra rows to make sure the number of rows is 3181 divisible by the blocksize 3182 */ 3183 mbs = M/bs; 3184 extra_rows = bs - M + bs*(mbs); 3185 if (extra_rows == bs) extra_rows = 0; 3186 else mbs++; 3187 if (extra_rows) { 3188 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3189 } 3190 3191 /* Set global sizes if not already set */ 3192 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3193 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3194 } else { /* Check if the matrix global sizes are correct */ 3195 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3196 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3197 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3198 } 3199 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3200 } 3201 3202 /* read in row lengths */ 3203 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 3204 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3205 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3206 3207 /* read in column indices */ 3208 ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr); 3209 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3210 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3211 3212 /* loop over row lengths determining block row lengths */ 3213 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3214 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3215 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3216 rowcount = 0; 3217 nzcount = 0; 3218 for (i=0; i<mbs; i++) { 3219 nmask = 0; 3220 for (j=0; j<bs; j++) { 3221 kmax = rowlengths[rowcount]; 3222 for (k=0; k<kmax; k++) { 3223 tmp = jj[nzcount++]/bs; 3224 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3225 } 3226 rowcount++; 3227 } 3228 browlengths[i] += nmask; 3229 /* zero out the mask elements we set */ 3230 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3231 } 3232 3233 /* Do preallocation */ 3234 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3235 a = (Mat_SeqBAIJ*)newmat->data; 3236 3237 /* set matrix "i" values */ 3238 a->i[0] = 0; 3239 for (i=1; i<= mbs; i++) { 3240 a->i[i] = a->i[i-1] + browlengths[i-1]; 3241 a->ilen[i-1] = browlengths[i-1]; 3242 } 3243 a->nz = 0; 3244 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3245 3246 /* read in nonzero values */ 3247 ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr); 3248 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3249 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3250 3251 /* set "a" and "j" values into matrix */ 3252 nzcount = 0; jcount = 0; 3253 for (i=0; i<mbs; i++) { 3254 nzcountb = nzcount; 3255 nmask = 0; 3256 for (j=0; j<bs; j++) { 3257 kmax = rowlengths[i*bs+j]; 3258 for (k=0; k<kmax; k++) { 3259 tmp = jj[nzcount++]/bs; 3260 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3261 } 3262 } 3263 /* sort the masked values */ 3264 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3265 3266 /* set "j" values into matrix */ 3267 maskcount = 1; 3268 for (j=0; j<nmask; j++) { 3269 a->j[jcount++] = masked[j]; 3270 mask[masked[j]] = maskcount++; 3271 } 3272 /* set "a" values into matrix */ 3273 ishift = bs2*a->i[i]; 3274 for (j=0; j<bs; j++) { 3275 kmax = rowlengths[i*bs+j]; 3276 for (k=0; k<kmax; k++) { 3277 tmp = jj[nzcountb]/bs; 3278 block = mask[tmp] - 1; 3279 point = jj[nzcountb] - bs*tmp; 3280 idx = ishift + bs2*block + j + bs*point; 3281 a->a[idx] = (MatScalar)aa[nzcountb++]; 3282 } 3283 } 3284 /* zero out the mask elements we set */ 3285 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3286 } 3287 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3288 3289 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3290 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3291 ierr = PetscFree(aa);CHKERRQ(ierr); 3292 ierr = PetscFree(jj);CHKERRQ(ierr); 3293 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3294 3295 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3296 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatCreateSeqBAIJ" 3302 /*@C 3303 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3304 compressed row) format. For good matrix assembly performance the 3305 user should preallocate the matrix storage by setting the parameter nz 3306 (or the array nnz). By setting these parameters accurately, performance 3307 during matrix assembly can be increased by more than a factor of 50. 3308 3309 Collective on MPI_Comm 3310 3311 Input Parameters: 3312 + comm - MPI communicator, set to PETSC_COMM_SELF 3313 . bs - size of block 3314 . m - number of rows 3315 . n - number of columns 3316 . nz - number of nonzero blocks per block row (same for all rows) 3317 - nnz - array containing the number of nonzero blocks in the various block rows 3318 (possibly different for each block row) or NULL 3319 3320 Output Parameter: 3321 . A - the matrix 3322 3323 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3324 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3325 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3326 3327 Options Database Keys: 3328 . -mat_no_unroll - uses code that does not unroll the loops in the 3329 block calculations (much slower) 3330 . -mat_block_size - size of the blocks to use 3331 3332 Level: intermediate 3333 3334 Notes: 3335 The number of rows and columns must be divisible by blocksize. 3336 3337 If the nnz parameter is given then the nz parameter is ignored 3338 3339 A nonzero block is any block that as 1 or more nonzeros in it 3340 3341 The block AIJ format is fully compatible with standard Fortran 77 3342 storage. That is, the stored row and column indices can begin at 3343 either one (as in Fortran) or zero. See the users' manual for details. 3344 3345 Specify the preallocated storage with either nz or nnz (not both). 3346 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3347 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3348 matrices. 3349 3350 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3351 @*/ 3352 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3353 { 3354 PetscErrorCode ierr; 3355 3356 PetscFunctionBegin; 3357 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3358 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3359 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3360 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3361 PetscFunctionReturn(0); 3362 } 3363 3364 #undef __FUNCT__ 3365 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3366 /*@C 3367 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3368 per row in the matrix. For good matrix assembly performance the 3369 user should preallocate the matrix storage by setting the parameter nz 3370 (or the array nnz). By setting these parameters accurately, performance 3371 during matrix assembly can be increased by more than a factor of 50. 3372 3373 Collective on MPI_Comm 3374 3375 Input Parameters: 3376 + A - the matrix 3377 . bs - size of block 3378 . nz - number of block nonzeros per block row (same for all rows) 3379 - nnz - array containing the number of block nonzeros in the various block rows 3380 (possibly different for each block row) or NULL 3381 3382 Options Database Keys: 3383 . -mat_no_unroll - uses code that does not unroll the loops in the 3384 block calculations (much slower) 3385 . -mat_block_size - size of the blocks to use 3386 3387 Level: intermediate 3388 3389 Notes: 3390 If the nnz parameter is given then the nz parameter is ignored 3391 3392 You can call MatGetInfo() to get information on how effective the preallocation was; 3393 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3394 You can also run with the option -info and look for messages with the string 3395 malloc in them to see if additional memory allocation was needed. 3396 3397 The block AIJ format is fully compatible with standard Fortran 77 3398 storage. That is, the stored row and column indices can begin at 3399 either one (as in Fortran) or zero. See the users' manual for details. 3400 3401 Specify the preallocated storage with either nz or nnz (not both). 3402 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3403 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3404 3405 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3406 @*/ 3407 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3408 { 3409 PetscErrorCode ierr; 3410 3411 PetscFunctionBegin; 3412 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3413 PetscValidType(B,1); 3414 PetscValidLogicalCollectiveInt(B,bs,2); 3415 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 #undef __FUNCT__ 3420 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3421 /*@C 3422 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3423 (the default sequential PETSc format). 3424 3425 Collective on MPI_Comm 3426 3427 Input Parameters: 3428 + A - the matrix 3429 . i - the indices into j for the start of each local row (starts with zero) 3430 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3431 - v - optional values in the matrix 3432 3433 Level: developer 3434 3435 Notes: 3436 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3437 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3438 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3439 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3440 block column and the second index is over columns within a block. 3441 3442 .keywords: matrix, aij, compressed row, sparse 3443 3444 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3445 @*/ 3446 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3447 { 3448 PetscErrorCode ierr; 3449 3450 PetscFunctionBegin; 3451 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3452 PetscValidType(B,1); 3453 PetscValidLogicalCollectiveInt(B,bs,2); 3454 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3455 PetscFunctionReturn(0); 3456 } 3457 3458 3459 #undef __FUNCT__ 3460 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3461 /*@ 3462 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3463 3464 Collective on MPI_Comm 3465 3466 Input Parameters: 3467 + comm - must be an MPI communicator of size 1 3468 . bs - size of block 3469 . m - number of rows 3470 . n - number of columns 3471 . i - row indices 3472 . j - column indices 3473 - a - matrix values 3474 3475 Output Parameter: 3476 . mat - the matrix 3477 3478 Level: advanced 3479 3480 Notes: 3481 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3482 once the matrix is destroyed 3483 3484 You cannot set new nonzero locations into this matrix, that will generate an error. 3485 3486 The i and j indices are 0 based 3487 3488 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3489 3490 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3491 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3492 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3493 with column-major ordering within blocks. 3494 3495 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3496 3497 @*/ 3498 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3499 { 3500 PetscErrorCode ierr; 3501 PetscInt ii; 3502 Mat_SeqBAIJ *baij; 3503 3504 PetscFunctionBegin; 3505 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3506 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3507 3508 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3509 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3510 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3511 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3512 baij = (Mat_SeqBAIJ*)(*mat)->data; 3513 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3514 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3515 3516 baij->i = i; 3517 baij->j = j; 3518 baij->a = a; 3519 3520 baij->singlemalloc = PETSC_FALSE; 3521 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3522 baij->free_a = PETSC_FALSE; 3523 baij->free_ij = PETSC_FALSE; 3524 3525 for (ii=0; ii<m; ii++) { 3526 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3527 #if defined(PETSC_USE_DEBUG) 3528 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3529 #endif 3530 } 3531 #if defined(PETSC_USE_DEBUG) 3532 for (ii=0; ii<baij->i[m]; ii++) { 3533 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3534 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3535 } 3536 #endif 3537 3538 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3539 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3540 PetscFunctionReturn(0); 3541 } 3542