1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*ii = a->i,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !ii) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (diag[i] >= ii[i+1]) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1200 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1202 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1204 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1205 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1206 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1208 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1209 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1210 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1211 1212 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1213 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1214 ierr = PetscFree(A->data);CHKERRQ(ierr); 1215 1216 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1233 { 1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 switch (op) { 1239 case MAT_ROW_ORIENTED: 1240 a->roworiented = flg; 1241 break; 1242 case MAT_KEEP_NONZERO_PATTERN: 1243 a->keepnonzeropattern = flg; 1244 break; 1245 case MAT_NEW_NONZERO_LOCATIONS: 1246 a->nonew = (flg ? 0 : 1); 1247 break; 1248 case MAT_NEW_NONZERO_LOCATION_ERR: 1249 a->nonew = (flg ? -1 : 0); 1250 break; 1251 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1252 a->nonew = (flg ? -2 : 0); 1253 break; 1254 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1255 a->nounused = (flg ? -1 : 0); 1256 break; 1257 case MAT_NEW_DIAGONALS: 1258 case MAT_IGNORE_OFF_PROC_ENTRIES: 1259 case MAT_USE_HASH_TABLE: 1260 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1261 break; 1262 case MAT_SPD: 1263 case MAT_SYMMETRIC: 1264 case MAT_STRUCTURALLY_SYMMETRIC: 1265 case MAT_HERMITIAN: 1266 case MAT_SYMMETRY_ETERNAL: 1267 /* These options are handled directly by MatSetOption() */ 1268 break; 1269 default: 1270 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 1275 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1278 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1279 { 1280 PetscErrorCode ierr; 1281 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1282 MatScalar *aa_i; 1283 PetscScalar *v_i; 1284 1285 PetscFunctionBegin; 1286 bs = A->rmap->bs; 1287 bs2 = bs*bs; 1288 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1289 1290 bn = row/bs; /* Block number */ 1291 bp = row % bs; /* Block Position */ 1292 M = ai[bn+1] - ai[bn]; 1293 *nz = bs*M; 1294 1295 if (v) { 1296 *v = 0; 1297 if (*nz) { 1298 ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr); 1299 for (i=0; i<M; i++) { /* for each block in the block row */ 1300 v_i = *v + i*bs; 1301 aa_i = aa + bs2*(ai[bn] + i); 1302 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1303 } 1304 } 1305 } 1306 1307 if (idx) { 1308 *idx = 0; 1309 if (*nz) { 1310 ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr); 1311 for (i=0; i<M; i++) { /* for each block in the block row */ 1312 idx_i = *idx + i*bs; 1313 itmp = bs*aj[ai[bn] + i]; 1314 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1315 } 1316 } 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1323 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1324 { 1325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1335 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1336 { 1337 PetscErrorCode ierr; 1338 1339 PetscFunctionBegin; 1340 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1341 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1342 PetscFunctionReturn(0); 1343 } 1344 1345 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1349 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1350 { 1351 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1352 Mat C; 1353 PetscErrorCode ierr; 1354 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1355 PetscInt *rows,*cols,bs2=a->bs2; 1356 MatScalar *array; 1357 1358 PetscFunctionBegin; 1359 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1360 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1361 ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr); 1362 1363 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1364 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1365 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1366 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1367 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1368 ierr = PetscFree(col);CHKERRQ(ierr); 1369 } else { 1370 C = *B; 1371 } 1372 1373 array = a->a; 1374 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1375 for (i=0; i<mbs; i++) { 1376 cols[0] = i*bs; 1377 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1378 len = ai[i+1] - ai[i]; 1379 for (j=0; j<len; j++) { 1380 rows[0] = (*aj++)*bs; 1381 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1382 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1383 array += bs2; 1384 } 1385 } 1386 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1387 1388 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 1391 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1392 *B = C; 1393 } else { 1394 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1401 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1402 { 1403 PetscErrorCode ierr; 1404 Mat Btrans; 1405 1406 PetscFunctionBegin; 1407 *f = PETSC_FALSE; 1408 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1409 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1410 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1419 PetscErrorCode ierr; 1420 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1421 int fd; 1422 PetscScalar *aa; 1423 FILE *file; 1424 1425 PetscFunctionBegin; 1426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1427 ierr = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr); 1428 col_lens[0] = MAT_FILE_CLASSID; 1429 1430 col_lens[1] = A->rmap->N; 1431 col_lens[2] = A->cmap->n; 1432 col_lens[3] = a->nz*bs2; 1433 1434 /* store lengths of each row and write (including header) to file */ 1435 count = 0; 1436 for (i=0; i<a->mbs; i++) { 1437 for (j=0; j<bs; j++) { 1438 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1439 } 1440 } 1441 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1442 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1443 1444 /* store column indices (zero start index) */ 1445 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1446 count = 0; 1447 for (i=0; i<a->mbs; i++) { 1448 for (j=0; j<bs; j++) { 1449 for (k=a->i[i]; k<a->i[i+1]; k++) { 1450 for (l=0; l<bs; l++) { 1451 jj[count++] = bs*a->j[k] + l; 1452 } 1453 } 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscFree(jj);CHKERRQ(ierr); 1458 1459 /* store nonzero values */ 1460 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1461 count = 0; 1462 for (i=0; i<a->mbs; i++) { 1463 for (j=0; j<bs; j++) { 1464 for (k=a->i[i]; k<a->i[i+1]; k++) { 1465 for (l=0; l<bs; l++) { 1466 aa[count++] = a->a[bs2*k + l*bs + j]; 1467 } 1468 } 1469 } 1470 } 1471 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1472 ierr = PetscFree(aa);CHKERRQ(ierr); 1473 1474 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1475 if (file) { 1476 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1483 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1484 { 1485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1488 PetscViewerFormat format; 1489 1490 PetscFunctionBegin; 1491 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1492 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1493 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1494 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1495 Mat aij; 1496 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1497 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1498 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1499 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1500 PetscFunctionReturn(0); 1501 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1502 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1503 for (i=0; i<a->mbs; i++) { 1504 for (j=0; j<bs; j++) { 1505 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1506 for (k=a->i[i]; k<a->i[i+1]; k++) { 1507 for (l=0; l<bs; l++) { 1508 #if defined(PETSC_USE_COMPLEX) 1509 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1510 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1511 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1512 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1513 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1514 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1515 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1516 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1517 } 1518 #else 1519 if (a->a[bs2*k + l*bs + j] != 0.0) { 1520 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1521 } 1522 #endif 1523 } 1524 } 1525 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1526 } 1527 } 1528 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1529 } else { 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1531 for (i=0; i<a->mbs; i++) { 1532 for (j=0; j<bs; j++) { 1533 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1534 for (k=a->i[i]; k<a->i[i+1]; k++) { 1535 for (l=0; l<bs; l++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1542 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } 1546 #else 1547 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1548 #endif 1549 } 1550 } 1551 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1552 } 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 } 1556 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #include <petscdraw.h> 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1563 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1564 { 1565 Mat A = (Mat) Aa; 1566 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1567 PetscErrorCode ierr; 1568 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1569 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1570 MatScalar *aa; 1571 PetscViewer viewer; 1572 PetscViewerFormat format; 1573 1574 PetscFunctionBegin; 1575 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1576 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1577 1578 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1579 1580 /* loop over matrix elements drawing boxes */ 1581 1582 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1583 color = PETSC_DRAW_BLUE; 1584 for (i=0,row=0; i<mbs; i++,row+=bs) { 1585 for (j=a->i[i]; j<a->i[i+1]; j++) { 1586 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1587 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1588 aa = a->a + j*bs2; 1589 for (k=0; k<bs; k++) { 1590 for (l=0; l<bs; l++) { 1591 if (PetscRealPart(*aa++) >= 0.) continue; 1592 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1593 } 1594 } 1595 } 1596 } 1597 color = PETSC_DRAW_CYAN; 1598 for (i=0,row=0; i<mbs; i++,row+=bs) { 1599 for (j=a->i[i]; j<a->i[i+1]; j++) { 1600 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1601 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1602 aa = a->a + j*bs2; 1603 for (k=0; k<bs; k++) { 1604 for (l=0; l<bs; l++) { 1605 if (PetscRealPart(*aa++) != 0.) continue; 1606 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1607 } 1608 } 1609 } 1610 } 1611 color = PETSC_DRAW_RED; 1612 for (i=0,row=0; i<mbs; i++,row+=bs) { 1613 for (j=a->i[i]; j<a->i[i+1]; j++) { 1614 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1615 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1616 aa = a->a + j*bs2; 1617 for (k=0; k<bs; k++) { 1618 for (l=0; l<bs; l++) { 1619 if (PetscRealPart(*aa++) <= 0.) continue; 1620 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1621 } 1622 } 1623 } 1624 } 1625 } else { 1626 /* use contour shading to indicate magnitude of values */ 1627 /* first determine max of all nonzero values */ 1628 PetscDraw popup; 1629 PetscReal scale,maxv = 0.0; 1630 1631 for (i=0; i<a->nz*a->bs2; i++) { 1632 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1633 } 1634 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1635 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1636 if (popup) { 1637 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1638 } 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 } 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1658 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1659 { 1660 PetscErrorCode ierr; 1661 PetscReal xl,yl,xr,yr,w,h; 1662 PetscDraw draw; 1663 PetscBool isnull; 1664 1665 PetscFunctionBegin; 1666 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1667 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1668 1669 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1670 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1671 xr += w; yr += h; xl = -w; yl = -h; 1672 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1673 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1674 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatView_SeqBAIJ" 1680 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1681 { 1682 PetscErrorCode ierr; 1683 PetscBool iascii,isbinary,isdraw; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1689 if (iascii) { 1690 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1691 } else if (isbinary) { 1692 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1693 } else if (isdraw) { 1694 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1695 } else { 1696 Mat B; 1697 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1698 ierr = MatView(B,viewer);CHKERRQ(ierr); 1699 ierr = MatDestroy(&B);CHKERRQ(ierr); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1707 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1708 { 1709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1710 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1711 PetscInt *ai = a->i,*ailen = a->ilen; 1712 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1713 MatScalar *ap,*aa = a->a; 1714 1715 PetscFunctionBegin; 1716 for (k=0; k<m; k++) { /* loop over rows */ 1717 row = im[k]; brow = row/bs; 1718 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1719 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1720 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1721 nrow = ailen[brow]; 1722 for (l=0; l<n; l++) { /* loop over columns */ 1723 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1724 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1725 col = in[l]; 1726 bcol = col/bs; 1727 cidx = col%bs; 1728 ridx = row%bs; 1729 high = nrow; 1730 low = 0; /* assume unsorted */ 1731 while (high-low > 5) { 1732 t = (low+high)/2; 1733 if (rp[t] > bcol) high = t; 1734 else low = t; 1735 } 1736 for (i=low; i<high; i++) { 1737 if (rp[i] > bcol) break; 1738 if (rp[i] == bcol) { 1739 *v++ = ap[bs2*i+bs*cidx+ridx]; 1740 goto finished; 1741 } 1742 } 1743 *v++ = 0.0; 1744 finished:; 1745 } 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1752 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1753 { 1754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1755 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1756 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1757 PetscErrorCode ierr; 1758 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1759 PetscBool roworiented=a->roworiented; 1760 const PetscScalar *value = v; 1761 MatScalar *ap,*aa = a->a,*bap; 1762 1763 PetscFunctionBegin; 1764 if (roworiented) { 1765 stepval = (n-1)*bs; 1766 } else { 1767 stepval = (m-1)*bs; 1768 } 1769 for (k=0; k<m; k++) { /* loop over added rows */ 1770 row = im[k]; 1771 if (row < 0) continue; 1772 #if defined(PETSC_USE_DEBUG) 1773 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1774 #endif 1775 rp = aj + ai[row]; 1776 ap = aa + bs2*ai[row]; 1777 rmax = imax[row]; 1778 nrow = ailen[row]; 1779 low = 0; 1780 high = nrow; 1781 for (l=0; l<n; l++) { /* loop over added columns */ 1782 if (in[l] < 0) continue; 1783 #if defined(PETSC_USE_DEBUG) 1784 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1785 #endif 1786 col = in[l]; 1787 if (roworiented) { 1788 value = v + (k*(stepval+bs) + l)*bs; 1789 } else { 1790 value = v + (l*(stepval+bs) + k)*bs; 1791 } 1792 if (col <= lastcol) low = 0; 1793 else high = nrow; 1794 lastcol = col; 1795 while (high-low > 7) { 1796 t = (low+high)/2; 1797 if (rp[t] > col) high = t; 1798 else low = t; 1799 } 1800 for (i=low; i<high; i++) { 1801 if (rp[i] > col) break; 1802 if (rp[i] == col) { 1803 bap = ap + bs2*i; 1804 if (roworiented) { 1805 if (is == ADD_VALUES) { 1806 for (ii=0; ii<bs; ii++,value+=stepval) { 1807 for (jj=ii; jj<bs2; jj+=bs) { 1808 bap[jj] += *value++; 1809 } 1810 } 1811 } else { 1812 for (ii=0; ii<bs; ii++,value+=stepval) { 1813 for (jj=ii; jj<bs2; jj+=bs) { 1814 bap[jj] = *value++; 1815 } 1816 } 1817 } 1818 } else { 1819 if (is == ADD_VALUES) { 1820 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1821 for (jj=0; jj<bs; jj++) { 1822 bap[jj] += value[jj]; 1823 } 1824 bap += bs; 1825 } 1826 } else { 1827 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1828 for (jj=0; jj<bs; jj++) { 1829 bap[jj] = value[jj]; 1830 } 1831 bap += bs; 1832 } 1833 } 1834 } 1835 goto noinsert2; 1836 } 1837 } 1838 if (nonew == 1) goto noinsert2; 1839 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1840 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1841 N = nrow++ - 1; high++; 1842 /* shift up all the later entries in this row */ 1843 for (ii=N; ii>=i; ii--) { 1844 rp[ii+1] = rp[ii]; 1845 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1846 } 1847 if (N >= i) { 1848 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1849 } 1850 rp[i] = col; 1851 bap = ap + bs2*i; 1852 if (roworiented) { 1853 for (ii=0; ii<bs; ii++,value+=stepval) { 1854 for (jj=ii; jj<bs2; jj+=bs) { 1855 bap[jj] = *value++; 1856 } 1857 } 1858 } else { 1859 for (ii=0; ii<bs; ii++,value+=stepval) { 1860 for (jj=0; jj<bs; jj++) { 1861 *bap++ = *value++; 1862 } 1863 } 1864 } 1865 noinsert2:; 1866 low = i; 1867 } 1868 ailen[row] = nrow; 1869 } 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1875 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1876 { 1877 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1878 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1879 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1880 PetscErrorCode ierr; 1881 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1882 MatScalar *aa = a->a,*ap; 1883 PetscReal ratio=0.6; 1884 1885 PetscFunctionBegin; 1886 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1887 1888 if (m) rmax = ailen[0]; 1889 for (i=1; i<mbs; i++) { 1890 /* move each row back by the amount of empty slots (fshift) before it*/ 1891 fshift += imax[i-1] - ailen[i-1]; 1892 rmax = PetscMax(rmax,ailen[i]); 1893 if (fshift) { 1894 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1895 N = ailen[i]; 1896 for (j=0; j<N; j++) { 1897 ip[j-fshift] = ip[j]; 1898 1899 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1900 } 1901 } 1902 ai[i] = ai[i-1] + ailen[i-1]; 1903 } 1904 if (mbs) { 1905 fshift += imax[mbs-1] - ailen[mbs-1]; 1906 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1907 } 1908 1909 /* reset ilen and imax for each row */ 1910 a->nonzerorowcnt = 0; 1911 for (i=0; i<mbs; i++) { 1912 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1913 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1914 } 1915 a->nz = ai[mbs]; 1916 1917 /* diagonals may have moved, so kill the diagonal pointers */ 1918 a->idiagvalid = PETSC_FALSE; 1919 if (fshift && a->diag) { 1920 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1921 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1922 a->diag = 0; 1923 } 1924 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1925 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1926 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1927 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1928 1929 A->info.mallocs += a->reallocs; 1930 a->reallocs = 0; 1931 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1932 1933 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 /* 1938 This function returns an array of flags which indicate the locations of contiguous 1939 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1940 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1941 Assume: sizes should be long enough to hold all the values. 1942 */ 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1945 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1946 { 1947 PetscInt i,j,k,row; 1948 PetscBool flg; 1949 1950 PetscFunctionBegin; 1951 for (i=0,j=0; i<n; j++) { 1952 row = idx[i]; 1953 if (row%bs!=0) { /* Not the begining of a block */ 1954 sizes[j] = 1; 1955 i++; 1956 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1957 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1958 i++; 1959 } else { /* Begining of the block, so check if the complete block exists */ 1960 flg = PETSC_TRUE; 1961 for (k=1; k<bs; k++) { 1962 if (row+k != idx[i+k]) { /* break in the block */ 1963 flg = PETSC_FALSE; 1964 break; 1965 } 1966 } 1967 if (flg) { /* No break in the bs */ 1968 sizes[j] = bs; 1969 i += bs; 1970 } else { 1971 sizes[j] = 1; 1972 i++; 1973 } 1974 } 1975 } 1976 *bs_max = j; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1982 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1983 { 1984 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1985 PetscErrorCode ierr; 1986 PetscInt i,j,k,count,*rows; 1987 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1988 PetscScalar zero = 0.0; 1989 MatScalar *aa; 1990 const PetscScalar *xx; 1991 PetscScalar *bb; 1992 1993 PetscFunctionBegin; 1994 /* fix right hand side if needed */ 1995 if (x && b) { 1996 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1997 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1998 for (i=0; i<is_n; i++) { 1999 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2000 } 2001 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2002 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2003 } 2004 2005 /* Make a copy of the IS and sort it */ 2006 /* allocate memory for rows,sizes */ 2007 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2008 2009 /* copy IS values to rows, and sort them */ 2010 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2011 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2012 2013 if (baij->keepnonzeropattern) { 2014 for (i=0; i<is_n; i++) sizes[i] = 1; 2015 bs_max = is_n; 2016 } else { 2017 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2018 A->nonzerostate++; 2019 } 2020 2021 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2022 row = rows[j]; 2023 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2024 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2025 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2026 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2027 if (diag != (PetscScalar)0.0) { 2028 if (baij->ilen[row/bs] > 0) { 2029 baij->ilen[row/bs] = 1; 2030 baij->j[baij->i[row/bs]] = row/bs; 2031 2032 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2033 } 2034 /* Now insert all the diagonal values for this bs */ 2035 for (k=0; k<bs; k++) { 2036 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2037 } 2038 } else { /* (diag == 0.0) */ 2039 baij->ilen[row/bs] = 0; 2040 } /* end (diag == 0.0) */ 2041 } else { /* (sizes[i] != bs) */ 2042 #if defined(PETSC_USE_DEBUG) 2043 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2044 #endif 2045 for (k=0; k<count; k++) { 2046 aa[0] = zero; 2047 aa += bs; 2048 } 2049 if (diag != (PetscScalar)0.0) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } 2053 } 2054 2055 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2056 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2062 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2063 { 2064 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2065 PetscErrorCode ierr; 2066 PetscInt i,j,k,count; 2067 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2068 PetscScalar zero = 0.0; 2069 MatScalar *aa; 2070 const PetscScalar *xx; 2071 PetscScalar *bb; 2072 PetscBool *zeroed,vecs = PETSC_FALSE; 2073 2074 PetscFunctionBegin; 2075 /* fix right hand side if needed */ 2076 if (x && b) { 2077 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2078 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2079 vecs = PETSC_TRUE; 2080 } 2081 2082 /* zero the columns */ 2083 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2084 for (i=0; i<is_n; i++) { 2085 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2086 zeroed[is_idx[i]] = PETSC_TRUE; 2087 } 2088 for (i=0; i<A->rmap->N; i++) { 2089 if (!zeroed[i]) { 2090 row = i/bs; 2091 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2092 for (k=0; k<bs; k++) { 2093 col = bs*baij->j[j] + k; 2094 if (zeroed[col]) { 2095 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2096 if (vecs) bb[i] -= aa[0]*xx[col]; 2097 aa[0] = 0.0; 2098 } 2099 } 2100 } 2101 } else if (vecs) bb[i] = diag*xx[i]; 2102 } 2103 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2104 if (vecs) { 2105 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2106 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2107 } 2108 2109 /* zero the rows */ 2110 for (i=0; i<is_n; i++) { 2111 row = is_idx[i]; 2112 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2113 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2114 for (k=0; k<count; k++) { 2115 aa[0] = zero; 2116 aa += bs; 2117 } 2118 if (diag != (PetscScalar)0.0) { 2119 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2120 } 2121 } 2122 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2128 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2129 { 2130 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2131 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2132 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2133 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2134 PetscErrorCode ierr; 2135 PetscInt ridx,cidx,bs2=a->bs2; 2136 PetscBool roworiented=a->roworiented; 2137 MatScalar *ap,value,*aa=a->a,*bap; 2138 2139 PetscFunctionBegin; 2140 for (k=0; k<m; k++) { /* loop over added rows */ 2141 row = im[k]; 2142 brow = row/bs; 2143 if (row < 0) continue; 2144 #if defined(PETSC_USE_DEBUG) 2145 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2146 #endif 2147 rp = aj + ai[brow]; 2148 ap = aa + bs2*ai[brow]; 2149 rmax = imax[brow]; 2150 nrow = ailen[brow]; 2151 low = 0; 2152 high = nrow; 2153 for (l=0; l<n; l++) { /* loop over added columns */ 2154 if (in[l] < 0) continue; 2155 #if defined(PETSC_USE_DEBUG) 2156 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2157 #endif 2158 col = in[l]; bcol = col/bs; 2159 ridx = row % bs; cidx = col % bs; 2160 if (roworiented) { 2161 value = v[l + k*n]; 2162 } else { 2163 value = v[k + l*m]; 2164 } 2165 if (col <= lastcol) low = 0; else high = nrow; 2166 lastcol = col; 2167 while (high-low > 7) { 2168 t = (low+high)/2; 2169 if (rp[t] > bcol) high = t; 2170 else low = t; 2171 } 2172 for (i=low; i<high; i++) { 2173 if (rp[i] > bcol) break; 2174 if (rp[i] == bcol) { 2175 bap = ap + bs2*i + bs*cidx + ridx; 2176 if (is == ADD_VALUES) *bap += value; 2177 else *bap = value; 2178 goto noinsert1; 2179 } 2180 } 2181 if (nonew == 1) goto noinsert1; 2182 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2183 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2184 N = nrow++ - 1; high++; 2185 /* shift up all the later entries in this row */ 2186 for (ii=N; ii>=i; ii--) { 2187 rp[ii+1] = rp[ii]; 2188 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2189 } 2190 if (N>=i) { 2191 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2192 } 2193 rp[i] = bcol; 2194 ap[bs2*i + bs*cidx + ridx] = value; 2195 a->nz++; 2196 A->nonzerostate++; 2197 noinsert1:; 2198 low = i; 2199 } 2200 ailen[brow] = nrow; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2207 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2208 { 2209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2210 Mat outA; 2211 PetscErrorCode ierr; 2212 PetscBool row_identity,col_identity; 2213 2214 PetscFunctionBegin; 2215 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2216 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2217 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2218 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2219 2220 outA = inA; 2221 inA->factortype = MAT_FACTOR_LU; 2222 2223 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2224 2225 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2226 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2227 a->row = row; 2228 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2229 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2230 a->col = col; 2231 2232 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2233 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2234 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2235 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2236 2237 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2238 if (!a->solve_work) { 2239 ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr); 2240 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2241 } 2242 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2248 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2249 { 2250 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2251 PetscInt i,nz,mbs; 2252 2253 PetscFunctionBegin; 2254 nz = baij->maxnz; 2255 mbs = baij->mbs; 2256 for (i=0; i<nz; i++) { 2257 baij->j[i] = indices[i]; 2258 } 2259 baij->nz = nz; 2260 for (i=0; i<mbs; i++) { 2261 baij->ilen[i] = baij->imax[i]; 2262 } 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2268 /*@ 2269 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2270 in the matrix. 2271 2272 Input Parameters: 2273 + mat - the SeqBAIJ matrix 2274 - indices - the column indices 2275 2276 Level: advanced 2277 2278 Notes: 2279 This can be called if you have precomputed the nonzero structure of the 2280 matrix and want to provide it to the matrix object to improve the performance 2281 of the MatSetValues() operation. 2282 2283 You MUST have set the correct numbers of nonzeros per row in the call to 2284 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2285 2286 MUST be called before any calls to MatSetValues(); 2287 2288 @*/ 2289 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2290 { 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2295 PetscValidPointer(indices,2); 2296 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2302 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2303 { 2304 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2307 PetscReal atmp; 2308 PetscScalar *x,zero = 0.0; 2309 MatScalar *aa; 2310 PetscInt ncols,brow,krow,kcol; 2311 2312 PetscFunctionBegin; 2313 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2314 bs = A->rmap->bs; 2315 aa = a->a; 2316 ai = a->i; 2317 aj = a->j; 2318 mbs = a->mbs; 2319 2320 ierr = VecSet(v,zero);CHKERRQ(ierr); 2321 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2322 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2323 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2324 for (i=0; i<mbs; i++) { 2325 ncols = ai[1] - ai[0]; ai++; 2326 brow = bs*i; 2327 for (j=0; j<ncols; j++) { 2328 for (kcol=0; kcol<bs; kcol++) { 2329 for (krow=0; krow<bs; krow++) { 2330 atmp = PetscAbsScalar(*aa);aa++; 2331 row = brow + krow; /* row index */ 2332 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2333 } 2334 } 2335 aj++; 2336 } 2337 } 2338 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 #undef __FUNCT__ 2343 #define __FUNCT__ "MatCopy_SeqBAIJ" 2344 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2345 { 2346 PetscErrorCode ierr; 2347 2348 PetscFunctionBegin; 2349 /* If the two matrices have the same copy implementation, use fast copy. */ 2350 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2351 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2352 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2353 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2354 2355 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2356 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2357 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2358 } else { 2359 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2366 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2367 { 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2372 PetscFunctionReturn(0); 2373 } 2374 2375 #undef __FUNCT__ 2376 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2377 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2378 { 2379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2380 2381 PetscFunctionBegin; 2382 *array = a->a; 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2388 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2389 { 2390 PetscFunctionBegin; 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2396 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2397 { 2398 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2399 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2400 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 /* Set the number of nonzeros in the new matrix */ 2405 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2411 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2412 { 2413 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2414 PetscErrorCode ierr; 2415 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2416 PetscBLASInt one=1; 2417 2418 PetscFunctionBegin; 2419 if (str == SAME_NONZERO_PATTERN) { 2420 PetscScalar alpha = a; 2421 PetscBLASInt bnz; 2422 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2423 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2424 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2425 } else { 2426 Mat B; 2427 PetscInt *nnz; 2428 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2429 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2430 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2431 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2432 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2433 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2434 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2435 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2436 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2437 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2438 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2439 ierr = PetscFree(nnz);CHKERRQ(ierr); 2440 } 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2446 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2447 { 2448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2449 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2450 MatScalar *aa = a->a; 2451 2452 PetscFunctionBegin; 2453 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2459 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2460 { 2461 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2462 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2463 MatScalar *aa = a->a; 2464 2465 PetscFunctionBegin; 2466 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2472 /* 2473 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2474 */ 2475 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2476 { 2477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2478 PetscErrorCode ierr; 2479 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2480 PetscInt nz = a->i[m],row,*jj,mr,col; 2481 2482 PetscFunctionBegin; 2483 *nn = n; 2484 if (!ia) PetscFunctionReturn(0); 2485 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2486 else { 2487 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2488 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2489 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2490 jj = a->j; 2491 for (i=0; i<nz; i++) { 2492 collengths[jj[i]]++; 2493 } 2494 cia[0] = oshift; 2495 for (i=0; i<n; i++) { 2496 cia[i+1] = cia[i] + collengths[i]; 2497 } 2498 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2499 jj = a->j; 2500 for (row=0; row<m; row++) { 2501 mr = a->i[row+1] - a->i[row]; 2502 for (i=0; i<mr; i++) { 2503 col = *jj++; 2504 2505 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2506 } 2507 } 2508 ierr = PetscFree(collengths);CHKERRQ(ierr); 2509 *ia = cia; *ja = cja; 2510 } 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2516 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2517 { 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 if (!ia) PetscFunctionReturn(0); 2522 ierr = PetscFree(*ia);CHKERRQ(ierr); 2523 ierr = PetscFree(*ja);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 /* 2528 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2529 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2530 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2531 */ 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2534 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2535 { 2536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2537 PetscErrorCode ierr; 2538 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2539 PetscInt nz = a->i[m],row,*jj,mr,col; 2540 PetscInt *cspidx; 2541 2542 PetscFunctionBegin; 2543 *nn = n; 2544 if (!ia) PetscFunctionReturn(0); 2545 2546 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2547 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2548 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2549 ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr); 2550 jj = a->j; 2551 for (i=0; i<nz; i++) { 2552 collengths[jj[i]]++; 2553 } 2554 cia[0] = oshift; 2555 for (i=0; i<n; i++) { 2556 cia[i+1] = cia[i] + collengths[i]; 2557 } 2558 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2559 jj = a->j; 2560 for (row=0; row<m; row++) { 2561 mr = a->i[row+1] - a->i[row]; 2562 for (i=0; i<mr; i++) { 2563 col = *jj++; 2564 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2565 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2566 } 2567 } 2568 ierr = PetscFree(collengths);CHKERRQ(ierr); 2569 *ia = cia; *ja = cja; 2570 *spidx = cspidx; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 #undef __FUNCT__ 2575 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2576 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2577 { 2578 PetscErrorCode ierr; 2579 2580 PetscFunctionBegin; 2581 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2582 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 /* -------------------------------------------------------------------*/ 2587 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2588 MatGetRow_SeqBAIJ, 2589 MatRestoreRow_SeqBAIJ, 2590 MatMult_SeqBAIJ_N, 2591 /* 4*/ MatMultAdd_SeqBAIJ_N, 2592 MatMultTranspose_SeqBAIJ, 2593 MatMultTransposeAdd_SeqBAIJ, 2594 0, 2595 0, 2596 0, 2597 /* 10*/ 0, 2598 MatLUFactor_SeqBAIJ, 2599 0, 2600 0, 2601 MatTranspose_SeqBAIJ, 2602 /* 15*/ MatGetInfo_SeqBAIJ, 2603 MatEqual_SeqBAIJ, 2604 MatGetDiagonal_SeqBAIJ, 2605 MatDiagonalScale_SeqBAIJ, 2606 MatNorm_SeqBAIJ, 2607 /* 20*/ 0, 2608 MatAssemblyEnd_SeqBAIJ, 2609 MatSetOption_SeqBAIJ, 2610 MatZeroEntries_SeqBAIJ, 2611 /* 24*/ MatZeroRows_SeqBAIJ, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /* 29*/ MatSetUp_SeqBAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /* 34*/ MatDuplicate_SeqBAIJ, 2622 0, 2623 0, 2624 MatILUFactor_SeqBAIJ, 2625 0, 2626 /* 39*/ MatAXPY_SeqBAIJ, 2627 MatGetSubMatrices_SeqBAIJ, 2628 MatIncreaseOverlap_SeqBAIJ, 2629 MatGetValues_SeqBAIJ, 2630 MatCopy_SeqBAIJ, 2631 /* 44*/ 0, 2632 MatScale_SeqBAIJ, 2633 0, 2634 0, 2635 MatZeroRowsColumns_SeqBAIJ, 2636 /* 49*/ 0, 2637 MatGetRowIJ_SeqBAIJ, 2638 MatRestoreRowIJ_SeqBAIJ, 2639 MatGetColumnIJ_SeqBAIJ, 2640 MatRestoreColumnIJ_SeqBAIJ, 2641 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2642 0, 2643 0, 2644 0, 2645 MatSetValuesBlocked_SeqBAIJ, 2646 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2647 MatDestroy_SeqBAIJ, 2648 MatView_SeqBAIJ, 2649 0, 2650 0, 2651 /* 64*/ 0, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2657 0, 2658 MatConvert_Basic, 2659 0, 2660 0, 2661 /* 74*/ 0, 2662 MatFDColoringApply_BAIJ, 2663 0, 2664 0, 2665 0, 2666 /* 79*/ 0, 2667 0, 2668 0, 2669 0, 2670 MatLoad_SeqBAIJ, 2671 /* 84*/ 0, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /* 89*/ 0, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /* 94*/ 0, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /* 99*/ 0, 2687 0, 2688 0, 2689 0, 2690 0, 2691 /*104*/ 0, 2692 MatRealPart_SeqBAIJ, 2693 MatImaginaryPart_SeqBAIJ, 2694 0, 2695 0, 2696 /*109*/ 0, 2697 0, 2698 0, 2699 0, 2700 MatMissingDiagonal_SeqBAIJ, 2701 /*114*/ 0, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /*119*/ 0, 2707 0, 2708 MatMultHermitianTranspose_SeqBAIJ, 2709 MatMultHermitianTransposeAdd_SeqBAIJ, 2710 0, 2711 /*124*/ 0, 2712 0, 2713 MatInvertBlockDiagonal_SeqBAIJ, 2714 0, 2715 0, 2716 /*129*/ 0, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*134*/ 0, 2722 0, 2723 0, 2724 0, 2725 0, 2726 /*139*/ 0, 2727 0, 2728 0, 2729 MatFDColoringSetUp_SeqXAIJ 2730 }; 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2734 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2735 { 2736 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2737 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2742 2743 /* allocate space for values if not already there */ 2744 if (!aij->saved_values) { 2745 ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr); 2746 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2747 } 2748 2749 /* copy values over */ 2750 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2756 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2757 { 2758 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2759 PetscErrorCode ierr; 2760 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2761 2762 PetscFunctionBegin; 2763 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2764 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2765 2766 /* copy values over */ 2767 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2772 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2776 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2777 { 2778 Mat_SeqBAIJ *b; 2779 PetscErrorCode ierr; 2780 PetscInt i,mbs,nbs,bs2; 2781 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2782 2783 PetscFunctionBegin; 2784 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2785 if (nz == MAT_SKIP_ALLOCATION) { 2786 skipallocation = PETSC_TRUE; 2787 nz = 0; 2788 } 2789 2790 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2791 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2792 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2793 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2794 2795 B->preallocated = PETSC_TRUE; 2796 2797 mbs = B->rmap->n/bs; 2798 nbs = B->cmap->n/bs; 2799 bs2 = bs*bs; 2800 2801 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2802 2803 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2804 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2805 if (nnz) { 2806 for (i=0; i<mbs; i++) { 2807 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2808 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2809 } 2810 } 2811 2812 b = (Mat_SeqBAIJ*)B->data; 2813 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2814 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2815 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2816 2817 if (!flg) { 2818 switch (bs) { 2819 case 1: 2820 B->ops->mult = MatMult_SeqBAIJ_1; 2821 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2822 break; 2823 case 2: 2824 B->ops->mult = MatMult_SeqBAIJ_2; 2825 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2826 break; 2827 case 3: 2828 B->ops->mult = MatMult_SeqBAIJ_3; 2829 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2830 break; 2831 case 4: 2832 B->ops->mult = MatMult_SeqBAIJ_4; 2833 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2834 break; 2835 case 5: 2836 B->ops->mult = MatMult_SeqBAIJ_5; 2837 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2838 break; 2839 case 6: 2840 B->ops->mult = MatMult_SeqBAIJ_6; 2841 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2842 break; 2843 case 7: 2844 B->ops->mult = MatMult_SeqBAIJ_7; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2846 break; 2847 case 15: 2848 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2850 break; 2851 default: 2852 B->ops->mult = MatMult_SeqBAIJ_N; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2854 break; 2855 } 2856 } 2857 B->ops->sor = MatSOR_SeqBAIJ; 2858 b->mbs = mbs; 2859 b->nbs = nbs; 2860 if (!skipallocation) { 2861 if (!b->imax) { 2862 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2863 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2864 2865 b->free_imax_ilen = PETSC_TRUE; 2866 } 2867 /* b->ilen will count nonzeros in each block row so far. */ 2868 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2869 if (!nnz) { 2870 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2871 else if (nz < 0) nz = 1; 2872 for (i=0; i<mbs; i++) b->imax[i] = nz; 2873 nz = nz*mbs; 2874 } else { 2875 nz = 0; 2876 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2877 } 2878 2879 /* allocate the matrix space */ 2880 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2881 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2882 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2883 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2884 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2885 2886 b->singlemalloc = PETSC_TRUE; 2887 b->i[0] = 0; 2888 for (i=1; i<mbs+1; i++) { 2889 b->i[i] = b->i[i-1] + b->imax[i-1]; 2890 } 2891 b->free_a = PETSC_TRUE; 2892 b->free_ij = PETSC_TRUE; 2893 #if defined(PETSC_THREADCOMM_ACTIVE) 2894 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2895 #endif 2896 } else { 2897 b->free_a = PETSC_FALSE; 2898 b->free_ij = PETSC_FALSE; 2899 } 2900 2901 b->bs2 = bs2; 2902 b->mbs = mbs; 2903 b->nz = 0; 2904 b->maxnz = nz; 2905 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2906 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2907 PetscFunctionReturn(0); 2908 } 2909 2910 #undef __FUNCT__ 2911 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2912 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2913 { 2914 PetscInt i,m,nz,nz_max=0,*nnz; 2915 PetscScalar *values=0; 2916 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2921 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2922 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2923 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2924 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2925 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2926 m = B->rmap->n/bs; 2927 2928 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2929 ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr); 2930 for (i=0; i<m; i++) { 2931 nz = ii[i+1]- ii[i]; 2932 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2933 nz_max = PetscMax(nz_max, nz); 2934 nnz[i] = nz; 2935 } 2936 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2937 ierr = PetscFree(nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2942 } 2943 for (i=0; i<m; i++) { 2944 PetscInt ncols = ii[i+1] - ii[i]; 2945 const PetscInt *icols = jj + ii[i]; 2946 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2947 if (!roworiented) { 2948 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2949 } else { 2950 PetscInt j; 2951 for (j=0; j<ncols; j++) { 2952 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2953 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2954 } 2955 } 2956 } 2957 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2965 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 2966 #if defined(PETSC_HAVE_MUMPS) 2967 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2968 #endif 2969 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 2970 2971 /*MC 2972 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2973 block sparse compressed row format. 2974 2975 Options Database Keys: 2976 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2977 2978 Level: beginner 2979 2980 .seealso: MatCreateSeqBAIJ() 2981 M*/ 2982 2983 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatCreate_SeqBAIJ" 2987 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2988 { 2989 PetscErrorCode ierr; 2990 PetscMPIInt size; 2991 Mat_SeqBAIJ *b; 2992 2993 PetscFunctionBegin; 2994 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2995 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2996 2997 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2998 B->data = (void*)b; 2999 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3000 3001 b->row = 0; 3002 b->col = 0; 3003 b->icol = 0; 3004 b->reallocs = 0; 3005 b->saved_values = 0; 3006 3007 b->roworiented = PETSC_TRUE; 3008 b->nonew = 0; 3009 b->diag = 0; 3010 b->solve_work = 0; 3011 b->mult_work = 0; 3012 B->spptr = 0; 3013 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3014 b->keepnonzeropattern = PETSC_FALSE; 3015 3016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3019 #if defined(PETSC_HAVE_MUMPS) 3020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3021 #endif 3022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3032 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3038 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3039 { 3040 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3041 PetscErrorCode ierr; 3042 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3043 3044 PetscFunctionBegin; 3045 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3046 3047 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3048 c->imax = a->imax; 3049 c->ilen = a->ilen; 3050 c->free_imax_ilen = PETSC_FALSE; 3051 } else { 3052 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3053 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3054 for (i=0; i<mbs; i++) { 3055 c->imax[i] = a->imax[i]; 3056 c->ilen[i] = a->ilen[i]; 3057 } 3058 c->free_imax_ilen = PETSC_TRUE; 3059 } 3060 3061 /* allocate the matrix space */ 3062 if (mallocmatspace) { 3063 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3064 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3065 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3066 3067 c->i = a->i; 3068 c->j = a->j; 3069 c->singlemalloc = PETSC_FALSE; 3070 c->free_a = PETSC_TRUE; 3071 c->free_ij = PETSC_FALSE; 3072 c->parent = A; 3073 C->preallocated = PETSC_TRUE; 3074 C->assembled = PETSC_TRUE; 3075 3076 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3077 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3078 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3079 } else { 3080 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3081 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3082 3083 c->singlemalloc = PETSC_TRUE; 3084 c->free_a = PETSC_TRUE; 3085 c->free_ij = PETSC_TRUE; 3086 3087 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3088 if (mbs > 0) { 3089 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3090 if (cpvalues == MAT_COPY_VALUES) { 3091 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3092 } else { 3093 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3094 } 3095 } 3096 C->preallocated = PETSC_TRUE; 3097 C->assembled = PETSC_TRUE; 3098 } 3099 } 3100 3101 c->roworiented = a->roworiented; 3102 c->nonew = a->nonew; 3103 3104 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3105 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3106 3107 c->bs2 = a->bs2; 3108 c->mbs = a->mbs; 3109 c->nbs = a->nbs; 3110 3111 if (a->diag) { 3112 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3113 c->diag = a->diag; 3114 c->free_diag = PETSC_FALSE; 3115 } else { 3116 ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr); 3117 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3118 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3119 c->free_diag = PETSC_TRUE; 3120 } 3121 } else c->diag = 0; 3122 3123 c->nz = a->nz; 3124 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3125 c->solve_work = 0; 3126 c->mult_work = 0; 3127 3128 c->compressedrow.use = a->compressedrow.use; 3129 c->compressedrow.nrows = a->compressedrow.nrows; 3130 if (a->compressedrow.use) { 3131 i = a->compressedrow.nrows; 3132 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3133 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3134 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3135 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3136 } else { 3137 c->compressedrow.use = PETSC_FALSE; 3138 c->compressedrow.i = NULL; 3139 c->compressedrow.rindex = NULL; 3140 } 3141 C->nonzerostate = A->nonzerostate; 3142 3143 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3144 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3145 PetscFunctionReturn(0); 3146 } 3147 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3150 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3151 { 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3156 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3157 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3158 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3159 PetscFunctionReturn(0); 3160 } 3161 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatLoad_SeqBAIJ" 3164 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3165 { 3166 Mat_SeqBAIJ *a; 3167 PetscErrorCode ierr; 3168 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3169 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3170 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3171 PetscInt *masked,nmask,tmp,bs2,ishift; 3172 PetscMPIInt size; 3173 int fd; 3174 PetscScalar *aa; 3175 MPI_Comm comm; 3176 3177 PetscFunctionBegin; 3178 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3179 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3180 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3181 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3182 bs2 = bs*bs; 3183 3184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3185 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3186 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3187 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3188 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3189 M = header[1]; N = header[2]; nz = header[3]; 3190 3191 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3192 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3193 3194 /* 3195 This code adds extra rows to make sure the number of rows is 3196 divisible by the blocksize 3197 */ 3198 mbs = M/bs; 3199 extra_rows = bs - M + bs*(mbs); 3200 if (extra_rows == bs) extra_rows = 0; 3201 else mbs++; 3202 if (extra_rows) { 3203 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3204 } 3205 3206 /* Set global sizes if not already set */ 3207 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3208 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3209 } else { /* Check if the matrix global sizes are correct */ 3210 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3211 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3212 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3213 } 3214 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3215 } 3216 3217 /* read in row lengths */ 3218 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 3219 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3220 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3221 3222 /* read in column indices */ 3223 ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr); 3224 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3225 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3226 3227 /* loop over row lengths determining block row lengths */ 3228 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3229 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3230 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3231 rowcount = 0; 3232 nzcount = 0; 3233 for (i=0; i<mbs; i++) { 3234 nmask = 0; 3235 for (j=0; j<bs; j++) { 3236 kmax = rowlengths[rowcount]; 3237 for (k=0; k<kmax; k++) { 3238 tmp = jj[nzcount++]/bs; 3239 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3240 } 3241 rowcount++; 3242 } 3243 browlengths[i] += nmask; 3244 /* zero out the mask elements we set */ 3245 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3246 } 3247 3248 /* Do preallocation */ 3249 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3250 a = (Mat_SeqBAIJ*)newmat->data; 3251 3252 /* set matrix "i" values */ 3253 a->i[0] = 0; 3254 for (i=1; i<= mbs; i++) { 3255 a->i[i] = a->i[i-1] + browlengths[i-1]; 3256 a->ilen[i-1] = browlengths[i-1]; 3257 } 3258 a->nz = 0; 3259 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3260 3261 /* read in nonzero values */ 3262 ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr); 3263 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3264 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3265 3266 /* set "a" and "j" values into matrix */ 3267 nzcount = 0; jcount = 0; 3268 for (i=0; i<mbs; i++) { 3269 nzcountb = nzcount; 3270 nmask = 0; 3271 for (j=0; j<bs; j++) { 3272 kmax = rowlengths[i*bs+j]; 3273 for (k=0; k<kmax; k++) { 3274 tmp = jj[nzcount++]/bs; 3275 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3276 } 3277 } 3278 /* sort the masked values */ 3279 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3280 3281 /* set "j" values into matrix */ 3282 maskcount = 1; 3283 for (j=0; j<nmask; j++) { 3284 a->j[jcount++] = masked[j]; 3285 mask[masked[j]] = maskcount++; 3286 } 3287 /* set "a" values into matrix */ 3288 ishift = bs2*a->i[i]; 3289 for (j=0; j<bs; j++) { 3290 kmax = rowlengths[i*bs+j]; 3291 for (k=0; k<kmax; k++) { 3292 tmp = jj[nzcountb]/bs; 3293 block = mask[tmp] - 1; 3294 point = jj[nzcountb] - bs*tmp; 3295 idx = ishift + bs2*block + j + bs*point; 3296 a->a[idx] = (MatScalar)aa[nzcountb++]; 3297 } 3298 } 3299 /* zero out the mask elements we set */ 3300 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3301 } 3302 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3303 3304 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3305 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3306 ierr = PetscFree(aa);CHKERRQ(ierr); 3307 ierr = PetscFree(jj);CHKERRQ(ierr); 3308 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3309 3310 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 PetscFunctionReturn(0); 3313 } 3314 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatCreateSeqBAIJ" 3317 /*@C 3318 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3319 compressed row) format. For good matrix assembly performance the 3320 user should preallocate the matrix storage by setting the parameter nz 3321 (or the array nnz). By setting these parameters accurately, performance 3322 during matrix assembly can be increased by more than a factor of 50. 3323 3324 Collective on MPI_Comm 3325 3326 Input Parameters: 3327 + comm - MPI communicator, set to PETSC_COMM_SELF 3328 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3329 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3330 . m - number of rows 3331 . n - number of columns 3332 . nz - number of nonzero blocks per block row (same for all rows) 3333 - nnz - array containing the number of nonzero blocks in the various block rows 3334 (possibly different for each block row) or NULL 3335 3336 Output Parameter: 3337 . A - the matrix 3338 3339 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3340 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3341 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3342 3343 Options Database Keys: 3344 . -mat_no_unroll - uses code that does not unroll the loops in the 3345 block calculations (much slower) 3346 . -mat_block_size - size of the blocks to use 3347 3348 Level: intermediate 3349 3350 Notes: 3351 The number of rows and columns must be divisible by blocksize. 3352 3353 If the nnz parameter is given then the nz parameter is ignored 3354 3355 A nonzero block is any block that as 1 or more nonzeros in it 3356 3357 The block AIJ format is fully compatible with standard Fortran 77 3358 storage. That is, the stored row and column indices can begin at 3359 either one (as in Fortran) or zero. See the users' manual for details. 3360 3361 Specify the preallocated storage with either nz or nnz (not both). 3362 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3363 allocation. See Users-Manual: ch_mat for details. 3364 matrices. 3365 3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3367 @*/ 3368 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3369 { 3370 PetscErrorCode ierr; 3371 3372 PetscFunctionBegin; 3373 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3374 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3375 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3376 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3377 PetscFunctionReturn(0); 3378 } 3379 3380 #undef __FUNCT__ 3381 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3382 /*@C 3383 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3384 per row in the matrix. For good matrix assembly performance the 3385 user should preallocate the matrix storage by setting the parameter nz 3386 (or the array nnz). By setting these parameters accurately, performance 3387 during matrix assembly can be increased by more than a factor of 50. 3388 3389 Collective on MPI_Comm 3390 3391 Input Parameters: 3392 + B - the matrix 3393 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3394 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3395 . nz - number of block nonzeros per block row (same for all rows) 3396 - nnz - array containing the number of block nonzeros in the various block rows 3397 (possibly different for each block row) or NULL 3398 3399 Options Database Keys: 3400 . -mat_no_unroll - uses code that does not unroll the loops in the 3401 block calculations (much slower) 3402 . -mat_block_size - size of the blocks to use 3403 3404 Level: intermediate 3405 3406 Notes: 3407 If the nnz parameter is given then the nz parameter is ignored 3408 3409 You can call MatGetInfo() to get information on how effective the preallocation was; 3410 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3411 You can also run with the option -info and look for messages with the string 3412 malloc in them to see if additional memory allocation was needed. 3413 3414 The block AIJ format is fully compatible with standard Fortran 77 3415 storage. That is, the stored row and column indices can begin at 3416 either one (as in Fortran) or zero. See the users' manual for details. 3417 3418 Specify the preallocated storage with either nz or nnz (not both). 3419 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3420 allocation. See Users-Manual: ch_mat for details. 3421 3422 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3423 @*/ 3424 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3425 { 3426 PetscErrorCode ierr; 3427 3428 PetscFunctionBegin; 3429 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3430 PetscValidType(B,1); 3431 PetscValidLogicalCollectiveInt(B,bs,2); 3432 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3433 PetscFunctionReturn(0); 3434 } 3435 3436 #undef __FUNCT__ 3437 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3438 /*@C 3439 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3440 (the default sequential PETSc format). 3441 3442 Collective on MPI_Comm 3443 3444 Input Parameters: 3445 + B - the matrix 3446 . i - the indices into j for the start of each local row (starts with zero) 3447 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3448 - v - optional values in the matrix 3449 3450 Level: developer 3451 3452 Notes: 3453 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3454 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3455 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3456 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3457 block column and the second index is over columns within a block. 3458 3459 .keywords: matrix, aij, compressed row, sparse 3460 3461 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3462 @*/ 3463 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3464 { 3465 PetscErrorCode ierr; 3466 3467 PetscFunctionBegin; 3468 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3469 PetscValidType(B,1); 3470 PetscValidLogicalCollectiveInt(B,bs,2); 3471 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3472 PetscFunctionReturn(0); 3473 } 3474 3475 3476 #undef __FUNCT__ 3477 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3478 /*@ 3479 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3480 3481 Collective on MPI_Comm 3482 3483 Input Parameters: 3484 + comm - must be an MPI communicator of size 1 3485 . bs - size of block 3486 . m - number of rows 3487 . n - number of columns 3488 . i - row indices 3489 . j - column indices 3490 - a - matrix values 3491 3492 Output Parameter: 3493 . mat - the matrix 3494 3495 Level: advanced 3496 3497 Notes: 3498 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3499 once the matrix is destroyed 3500 3501 You cannot set new nonzero locations into this matrix, that will generate an error. 3502 3503 The i and j indices are 0 based 3504 3505 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3506 3507 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3508 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3509 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3510 with column-major ordering within blocks. 3511 3512 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3513 3514 @*/ 3515 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3516 { 3517 PetscErrorCode ierr; 3518 PetscInt ii; 3519 Mat_SeqBAIJ *baij; 3520 3521 PetscFunctionBegin; 3522 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3523 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3524 3525 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3526 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3527 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3528 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3529 baij = (Mat_SeqBAIJ*)(*mat)->data; 3530 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3531 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3532 3533 baij->i = i; 3534 baij->j = j; 3535 baij->a = a; 3536 3537 baij->singlemalloc = PETSC_FALSE; 3538 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3539 baij->free_a = PETSC_FALSE; 3540 baij->free_ij = PETSC_FALSE; 3541 3542 for (ii=0; ii<m; ii++) { 3543 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3544 #if defined(PETSC_USE_DEBUG) 3545 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3546 #endif 3547 } 3548 #if defined(PETSC_USE_DEBUG) 3549 for (ii=0; ii<baij->i[m]; ii++) { 3550 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3551 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3552 } 3553 #endif 3554 3555 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3556 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3557 PetscFunctionReturn(0); 3558 } 3559