1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*jj = a->j,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !jj) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (jj[diag[i]] != i) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1200 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1202 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1204 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1205 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1206 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1208 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1209 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1210 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1211 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1212 1213 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1214 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1215 ierr = PetscFree(A->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1234 { 1235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_ROW_ORIENTED: 1241 a->roworiented = flg; 1242 break; 1243 case MAT_KEEP_NONZERO_PATTERN: 1244 a->keepnonzeropattern = flg; 1245 break; 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 a->nonew = (flg ? 0 : 1); 1248 break; 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 a->nonew = (flg ? -1 : 0); 1251 break; 1252 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1253 a->nonew = (flg ? -2 : 0); 1254 break; 1255 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1256 a->nounused = (flg ? -1 : 0); 1257 break; 1258 case MAT_CHECK_COMPRESSED_ROW: 1259 a->compressedrow.check = flg; 1260 break; 1261 case MAT_NEW_DIAGONALS: 1262 case MAT_IGNORE_OFF_PROC_ENTRIES: 1263 case MAT_USE_HASH_TABLE: 1264 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1265 break; 1266 case MAT_SPD: 1267 case MAT_SYMMETRIC: 1268 case MAT_STRUCTURALLY_SYMMETRIC: 1269 case MAT_HERMITIAN: 1270 case MAT_SYMMETRY_ETERNAL: 1271 /* These options are handled directly by MatSetOption() */ 1272 break; 1273 default: 1274 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1281 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1282 { 1283 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1284 PetscErrorCode ierr; 1285 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1286 MatScalar *aa,*aa_i; 1287 PetscScalar *v_i; 1288 1289 PetscFunctionBegin; 1290 bs = A->rmap->bs; 1291 ai = a->i; 1292 aj = a->j; 1293 aa = a->a; 1294 bs2 = a->bs2; 1295 1296 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1297 1298 bn = row/bs; /* Block number */ 1299 bp = row % bs; /* Block Position */ 1300 M = ai[bn+1] - ai[bn]; 1301 *nz = bs*M; 1302 1303 if (v) { 1304 *v = 0; 1305 if (*nz) { 1306 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1307 for (i=0; i<M; i++) { /* for each block in the block row */ 1308 v_i = *v + i*bs; 1309 aa_i = aa + bs2*(ai[bn] + i); 1310 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1311 } 1312 } 1313 } 1314 1315 if (idx) { 1316 *idx = 0; 1317 if (*nz) { 1318 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1319 for (i=0; i<M; i++) { /* for each block in the block row */ 1320 idx_i = *idx + i*bs; 1321 itmp = bs*aj[ai[bn] + i]; 1322 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1323 } 1324 } 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1331 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332 { 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1337 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1338 PetscFunctionReturn(0); 1339 } 1340 1341 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1345 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1346 { 1347 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1348 Mat C; 1349 PetscErrorCode ierr; 1350 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1351 PetscInt *rows,*cols,bs2=a->bs2; 1352 MatScalar *array; 1353 1354 PetscFunctionBegin; 1355 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1356 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1357 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1358 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1359 1360 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1361 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1362 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1363 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1364 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1365 ierr = PetscFree(col);CHKERRQ(ierr); 1366 } else { 1367 C = *B; 1368 } 1369 1370 array = a->a; 1371 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1372 for (i=0; i<mbs; i++) { 1373 cols[0] = i*bs; 1374 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1375 len = ai[i+1] - ai[i]; 1376 for (j=0; j<len; j++) { 1377 rows[0] = (*aj++)*bs; 1378 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1379 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1380 array += bs2; 1381 } 1382 } 1383 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1384 1385 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1386 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1387 1388 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1389 *B = C; 1390 } else { 1391 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1392 } 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1398 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1399 { 1400 PetscErrorCode ierr; 1401 Mat Btrans; 1402 1403 PetscFunctionBegin; 1404 *f = PETSC_FALSE; 1405 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1406 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1407 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1413 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1414 { 1415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1418 int fd; 1419 PetscScalar *aa; 1420 FILE *file; 1421 1422 PetscFunctionBegin; 1423 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1424 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1425 col_lens[0] = MAT_FILE_CLASSID; 1426 1427 col_lens[1] = A->rmap->N; 1428 col_lens[2] = A->cmap->n; 1429 col_lens[3] = a->nz*bs2; 1430 1431 /* store lengths of each row and write (including header) to file */ 1432 count = 0; 1433 for (i=0; i<a->mbs; i++) { 1434 for (j=0; j<bs; j++) { 1435 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1436 } 1437 } 1438 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1439 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1440 1441 /* store column indices (zero start index) */ 1442 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1443 count = 0; 1444 for (i=0; i<a->mbs; i++) { 1445 for (j=0; j<bs; j++) { 1446 for (k=a->i[i]; k<a->i[i+1]; k++) { 1447 for (l=0; l<bs; l++) { 1448 jj[count++] = bs*a->j[k] + l; 1449 } 1450 } 1451 } 1452 } 1453 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1454 ierr = PetscFree(jj);CHKERRQ(ierr); 1455 1456 /* store nonzero values */ 1457 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1458 count = 0; 1459 for (i=0; i<a->mbs; i++) { 1460 for (j=0; j<bs; j++) { 1461 for (k=a->i[i]; k<a->i[i+1]; k++) { 1462 for (l=0; l<bs; l++) { 1463 aa[count++] = a->a[bs2*k + l*bs + j]; 1464 } 1465 } 1466 } 1467 } 1468 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1469 ierr = PetscFree(aa);CHKERRQ(ierr); 1470 1471 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1472 if (file) { 1473 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1480 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1481 { 1482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1483 PetscErrorCode ierr; 1484 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1485 PetscViewerFormat format; 1486 1487 PetscFunctionBegin; 1488 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1489 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1490 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1491 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1492 Mat aij; 1493 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1494 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1495 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1496 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1497 PetscFunctionReturn(0); 1498 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1499 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1500 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr); 1501 for (i=0; i<a->mbs; i++) { 1502 for (j=0; j<bs; j++) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1504 for (k=a->i[i]; k<a->i[i+1]; k++) { 1505 for (l=0; l<bs; l++) { 1506 #if defined(PETSC_USE_COMPLEX) 1507 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1508 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1509 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1510 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1511 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1512 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1513 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1514 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1515 } 1516 #else 1517 if (a->a[bs2*k + l*bs + j] != 0.0) { 1518 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1519 } 1520 #endif 1521 } 1522 } 1523 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1524 } 1525 } 1526 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1527 } else { 1528 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1529 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr); 1530 for (i=0; i<a->mbs; i++) { 1531 for (j=0; j<bs; j++) { 1532 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1533 for (k=a->i[i]; k<a->i[i+1]; k++) { 1534 for (l=0; l<bs; l++) { 1535 #if defined(PETSC_USE_COMPLEX) 1536 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1538 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1539 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1541 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } else { 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1544 } 1545 #else 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1547 #endif 1548 } 1549 } 1550 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1551 } 1552 } 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1554 } 1555 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #include <petscdraw.h> 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1562 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1563 { 1564 Mat A = (Mat) Aa; 1565 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1566 PetscErrorCode ierr; 1567 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1568 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1569 MatScalar *aa; 1570 PetscViewer viewer; 1571 PetscViewerFormat format; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1575 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1576 1577 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1578 1579 /* loop over matrix elements drawing boxes */ 1580 1581 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1582 color = PETSC_DRAW_BLUE; 1583 for (i=0,row=0; i<mbs; i++,row+=bs) { 1584 for (j=a->i[i]; j<a->i[i+1]; j++) { 1585 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1586 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1587 aa = a->a + j*bs2; 1588 for (k=0; k<bs; k++) { 1589 for (l=0; l<bs; l++) { 1590 if (PetscRealPart(*aa++) >= 0.) continue; 1591 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1592 } 1593 } 1594 } 1595 } 1596 color = PETSC_DRAW_CYAN; 1597 for (i=0,row=0; i<mbs; i++,row+=bs) { 1598 for (j=a->i[i]; j<a->i[i+1]; j++) { 1599 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1600 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1601 aa = a->a + j*bs2; 1602 for (k=0; k<bs; k++) { 1603 for (l=0; l<bs; l++) { 1604 if (PetscRealPart(*aa++) != 0.) continue; 1605 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1606 } 1607 } 1608 } 1609 } 1610 color = PETSC_DRAW_RED; 1611 for (i=0,row=0; i<mbs; i++,row+=bs) { 1612 for (j=a->i[i]; j<a->i[i+1]; j++) { 1613 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1614 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1615 aa = a->a + j*bs2; 1616 for (k=0; k<bs; k++) { 1617 for (l=0; l<bs; l++) { 1618 if (PetscRealPart(*aa++) <= 0.) continue; 1619 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 } 1624 } else { 1625 /* use contour shading to indicate magnitude of values */ 1626 /* first determine max of all nonzero values */ 1627 PetscDraw popup; 1628 PetscReal scale,maxv = 0.0; 1629 1630 for (i=0; i<a->nz*a->bs2; i++) { 1631 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1632 } 1633 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1634 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1635 if (popup) { 1636 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1637 } 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1657 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1658 { 1659 PetscErrorCode ierr; 1660 PetscReal xl,yl,xr,yr,w,h; 1661 PetscDraw draw; 1662 PetscBool isnull; 1663 1664 PetscFunctionBegin; 1665 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1666 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1667 1668 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1669 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1670 xr += w; yr += h; xl = -w; yl = -h; 1671 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1672 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1673 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatView_SeqBAIJ" 1679 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1680 { 1681 PetscErrorCode ierr; 1682 PetscBool iascii,isbinary,isdraw; 1683 1684 PetscFunctionBegin; 1685 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1686 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1688 if (iascii) { 1689 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1690 } else if (isbinary) { 1691 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1692 } else if (isdraw) { 1693 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1694 } else { 1695 Mat B; 1696 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1697 ierr = MatView(B,viewer);CHKERRQ(ierr); 1698 ierr = MatDestroy(&B);CHKERRQ(ierr); 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1706 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1707 { 1708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1709 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1710 PetscInt *ai = a->i,*ailen = a->ilen; 1711 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1712 MatScalar *ap,*aa = a->a; 1713 1714 PetscFunctionBegin; 1715 for (k=0; k<m; k++) { /* loop over rows */ 1716 row = im[k]; brow = row/bs; 1717 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1718 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1719 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1720 nrow = ailen[brow]; 1721 for (l=0; l<n; l++) { /* loop over columns */ 1722 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1723 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1724 col = in[l]; 1725 bcol = col/bs; 1726 cidx = col%bs; 1727 ridx = row%bs; 1728 high = nrow; 1729 low = 0; /* assume unsorted */ 1730 while (high-low > 5) { 1731 t = (low+high)/2; 1732 if (rp[t] > bcol) high = t; 1733 else low = t; 1734 } 1735 for (i=low; i<high; i++) { 1736 if (rp[i] > bcol) break; 1737 if (rp[i] == bcol) { 1738 *v++ = ap[bs2*i+bs*cidx+ridx]; 1739 goto finished; 1740 } 1741 } 1742 *v++ = 0.0; 1743 finished:; 1744 } 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1751 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1752 { 1753 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1754 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1755 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1756 PetscErrorCode ierr; 1757 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1758 PetscBool roworiented=a->roworiented; 1759 const PetscScalar *value = v; 1760 MatScalar *ap,*aa = a->a,*bap; 1761 1762 PetscFunctionBegin; 1763 if (roworiented) { 1764 stepval = (n-1)*bs; 1765 } else { 1766 stepval = (m-1)*bs; 1767 } 1768 for (k=0; k<m; k++) { /* loop over added rows */ 1769 row = im[k]; 1770 if (row < 0) continue; 1771 #if defined(PETSC_USE_DEBUG) 1772 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1773 #endif 1774 rp = aj + ai[row]; 1775 ap = aa + bs2*ai[row]; 1776 rmax = imax[row]; 1777 nrow = ailen[row]; 1778 low = 0; 1779 high = nrow; 1780 for (l=0; l<n; l++) { /* loop over added columns */ 1781 if (in[l] < 0) continue; 1782 #if defined(PETSC_USE_DEBUG) 1783 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1784 #endif 1785 col = in[l]; 1786 if (roworiented) { 1787 value = v + (k*(stepval+bs) + l)*bs; 1788 } else { 1789 value = v + (l*(stepval+bs) + k)*bs; 1790 } 1791 if (col <= lastcol) low = 0; 1792 else high = nrow; 1793 lastcol = col; 1794 while (high-low > 7) { 1795 t = (low+high)/2; 1796 if (rp[t] > col) high = t; 1797 else low = t; 1798 } 1799 for (i=low; i<high; i++) { 1800 if (rp[i] > col) break; 1801 if (rp[i] == col) { 1802 bap = ap + bs2*i; 1803 if (roworiented) { 1804 if (is == ADD_VALUES) { 1805 for (ii=0; ii<bs; ii++,value+=stepval) { 1806 for (jj=ii; jj<bs2; jj+=bs) { 1807 bap[jj] += *value++; 1808 } 1809 } 1810 } else { 1811 for (ii=0; ii<bs; ii++,value+=stepval) { 1812 for (jj=ii; jj<bs2; jj+=bs) { 1813 bap[jj] = *value++; 1814 } 1815 } 1816 } 1817 } else { 1818 if (is == ADD_VALUES) { 1819 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1820 for (jj=0; jj<bs; jj++) { 1821 bap[jj] += value[jj]; 1822 } 1823 bap += bs; 1824 } 1825 } else { 1826 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1827 for (jj=0; jj<bs; jj++) { 1828 bap[jj] = value[jj]; 1829 } 1830 bap += bs; 1831 } 1832 } 1833 } 1834 goto noinsert2; 1835 } 1836 } 1837 if (nonew == 1) goto noinsert2; 1838 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1839 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1840 N = nrow++ - 1; high++; 1841 /* shift up all the later entries in this row */ 1842 for (ii=N; ii>=i; ii--) { 1843 rp[ii+1] = rp[ii]; 1844 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1845 } 1846 if (N >= i) { 1847 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1848 } 1849 rp[i] = col; 1850 bap = ap + bs2*i; 1851 if (roworiented) { 1852 for (ii=0; ii<bs; ii++,value+=stepval) { 1853 for (jj=ii; jj<bs2; jj+=bs) { 1854 bap[jj] = *value++; 1855 } 1856 } 1857 } else { 1858 for (ii=0; ii<bs; ii++,value+=stepval) { 1859 for (jj=0; jj<bs; jj++) { 1860 *bap++ = *value++; 1861 } 1862 } 1863 } 1864 noinsert2:; 1865 low = i; 1866 } 1867 ailen[row] = nrow; 1868 } 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1874 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1875 { 1876 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1877 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1878 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1879 PetscErrorCode ierr; 1880 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1881 MatScalar *aa = a->a,*ap; 1882 PetscReal ratio=0.6; 1883 1884 PetscFunctionBegin; 1885 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1886 1887 if (m) rmax = ailen[0]; 1888 for (i=1; i<mbs; i++) { 1889 /* move each row back by the amount of empty slots (fshift) before it*/ 1890 fshift += imax[i-1] - ailen[i-1]; 1891 rmax = PetscMax(rmax,ailen[i]); 1892 if (fshift) { 1893 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1894 N = ailen[i]; 1895 for (j=0; j<N; j++) { 1896 ip[j-fshift] = ip[j]; 1897 1898 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1899 } 1900 } 1901 ai[i] = ai[i-1] + ailen[i-1]; 1902 } 1903 if (mbs) { 1904 fshift += imax[mbs-1] - ailen[mbs-1]; 1905 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1906 } 1907 1908 /* reset ilen and imax for each row */ 1909 a->nonzerorowcnt = 0; 1910 for (i=0; i<mbs; i++) { 1911 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1912 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1913 } 1914 a->nz = ai[mbs]; 1915 1916 /* diagonals may have moved, so kill the diagonal pointers */ 1917 a->idiagvalid = PETSC_FALSE; 1918 if (fshift && a->diag) { 1919 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1920 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1921 a->diag = 0; 1922 } 1923 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1924 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1925 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1926 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1927 1928 A->info.mallocs += a->reallocs; 1929 a->reallocs = 0; 1930 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1931 1932 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1933 1934 A->same_nonzero = PETSC_TRUE; 1935 PetscFunctionReturn(0); 1936 } 1937 1938 /* 1939 This function returns an array of flags which indicate the locations of contiguous 1940 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1941 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1942 Assume: sizes should be long enough to hold all the values. 1943 */ 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1946 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1947 { 1948 PetscInt i,j,k,row; 1949 PetscBool flg; 1950 1951 PetscFunctionBegin; 1952 for (i=0,j=0; i<n; j++) { 1953 row = idx[i]; 1954 if (row%bs!=0) { /* Not the begining of a block */ 1955 sizes[j] = 1; 1956 i++; 1957 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1958 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1959 i++; 1960 } else { /* Begining of the block, so check if the complete block exists */ 1961 flg = PETSC_TRUE; 1962 for (k=1; k<bs; k++) { 1963 if (row+k != idx[i+k]) { /* break in the block */ 1964 flg = PETSC_FALSE; 1965 break; 1966 } 1967 } 1968 if (flg) { /* No break in the bs */ 1969 sizes[j] = bs; 1970 i += bs; 1971 } else { 1972 sizes[j] = 1; 1973 i++; 1974 } 1975 } 1976 } 1977 *bs_max = j; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1983 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1984 { 1985 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1986 PetscErrorCode ierr; 1987 PetscInt i,j,k,count,*rows; 1988 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1989 PetscScalar zero = 0.0; 1990 MatScalar *aa; 1991 const PetscScalar *xx; 1992 PetscScalar *bb; 1993 1994 PetscFunctionBegin; 1995 /* fix right hand side if needed */ 1996 if (x && b) { 1997 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1998 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1999 for (i=0; i<is_n; i++) { 2000 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2001 } 2002 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2003 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2004 } 2005 2006 /* Make a copy of the IS and sort it */ 2007 /* allocate memory for rows,sizes */ 2008 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2009 2010 /* copy IS values to rows, and sort them */ 2011 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2012 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2013 2014 if (baij->keepnonzeropattern) { 2015 for (i=0; i<is_n; i++) sizes[i] = 1; 2016 bs_max = is_n; 2017 A->same_nonzero = PETSC_TRUE; 2018 } else { 2019 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2020 2021 A->same_nonzero = PETSC_FALSE; 2022 } 2023 2024 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2025 row = rows[j]; 2026 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2027 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2028 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2029 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2030 if (diag != (PetscScalar)0.0) { 2031 if (baij->ilen[row/bs] > 0) { 2032 baij->ilen[row/bs] = 1; 2033 baij->j[baij->i[row/bs]] = row/bs; 2034 2035 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2036 } 2037 /* Now insert all the diagonal values for this bs */ 2038 for (k=0; k<bs; k++) { 2039 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2040 } 2041 } else { /* (diag == 0.0) */ 2042 baij->ilen[row/bs] = 0; 2043 } /* end (diag == 0.0) */ 2044 } else { /* (sizes[i] != bs) */ 2045 #if defined(PETSC_USE_DEBUG) 2046 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2047 #endif 2048 for (k=0; k<count; k++) { 2049 aa[0] = zero; 2050 aa += bs; 2051 } 2052 if (diag != (PetscScalar)0.0) { 2053 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2054 } 2055 } 2056 } 2057 2058 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2059 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2065 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2066 { 2067 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2068 PetscErrorCode ierr; 2069 PetscInt i,j,k,count; 2070 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2071 PetscScalar zero = 0.0; 2072 MatScalar *aa; 2073 const PetscScalar *xx; 2074 PetscScalar *bb; 2075 PetscBool *zeroed,vecs = PETSC_FALSE; 2076 2077 PetscFunctionBegin; 2078 /* fix right hand side if needed */ 2079 if (x && b) { 2080 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2081 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2082 vecs = PETSC_TRUE; 2083 } 2084 A->same_nonzero = PETSC_TRUE; 2085 2086 /* zero the columns */ 2087 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2088 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2089 for (i=0; i<is_n; i++) { 2090 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2091 zeroed[is_idx[i]] = PETSC_TRUE; 2092 } 2093 for (i=0; i<A->rmap->N; i++) { 2094 if (!zeroed[i]) { 2095 row = i/bs; 2096 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2097 for (k=0; k<bs; k++) { 2098 col = bs*baij->j[j] + k; 2099 if (zeroed[col]) { 2100 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2101 if (vecs) bb[i] -= aa[0]*xx[col]; 2102 aa[0] = 0.0; 2103 } 2104 } 2105 } 2106 } else if (vecs) bb[i] = diag*xx[i]; 2107 } 2108 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2109 if (vecs) { 2110 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2111 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2112 } 2113 2114 /* zero the rows */ 2115 for (i=0; i<is_n; i++) { 2116 row = is_idx[i]; 2117 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2118 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2119 for (k=0; k<count; k++) { 2120 aa[0] = zero; 2121 aa += bs; 2122 } 2123 if (diag != (PetscScalar)0.0) { 2124 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2125 } 2126 } 2127 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2133 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2134 { 2135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2136 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2137 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2138 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2139 PetscErrorCode ierr; 2140 PetscInt ridx,cidx,bs2=a->bs2; 2141 PetscBool roworiented=a->roworiented; 2142 MatScalar *ap,value,*aa=a->a,*bap; 2143 2144 PetscFunctionBegin; 2145 if (v) PetscValidScalarPointer(v,6); 2146 for (k=0; k<m; k++) { /* loop over added rows */ 2147 row = im[k]; 2148 brow = row/bs; 2149 if (row < 0) continue; 2150 #if defined(PETSC_USE_DEBUG) 2151 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2152 #endif 2153 rp = aj + ai[brow]; 2154 ap = aa + bs2*ai[brow]; 2155 rmax = imax[brow]; 2156 nrow = ailen[brow]; 2157 low = 0; 2158 high = nrow; 2159 for (l=0; l<n; l++) { /* loop over added columns */ 2160 if (in[l] < 0) continue; 2161 #if defined(PETSC_USE_DEBUG) 2162 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2163 #endif 2164 col = in[l]; bcol = col/bs; 2165 ridx = row % bs; cidx = col % bs; 2166 if (roworiented) { 2167 value = v[l + k*n]; 2168 } else { 2169 value = v[k + l*m]; 2170 } 2171 if (col <= lastcol) low = 0; else high = nrow; 2172 lastcol = col; 2173 while (high-low > 7) { 2174 t = (low+high)/2; 2175 if (rp[t] > bcol) high = t; 2176 else low = t; 2177 } 2178 for (i=low; i<high; i++) { 2179 if (rp[i] > bcol) break; 2180 if (rp[i] == bcol) { 2181 bap = ap + bs2*i + bs*cidx + ridx; 2182 if (is == ADD_VALUES) *bap += value; 2183 else *bap = value; 2184 goto noinsert1; 2185 } 2186 } 2187 if (nonew == 1) goto noinsert1; 2188 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2189 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2190 N = nrow++ - 1; high++; 2191 /* shift up all the later entries in this row */ 2192 for (ii=N; ii>=i; ii--) { 2193 rp[ii+1] = rp[ii]; 2194 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2195 } 2196 if (N>=i) { 2197 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2198 } 2199 rp[i] = bcol; 2200 ap[bs2*i + bs*cidx + ridx] = value; 2201 a->nz++; 2202 noinsert1:; 2203 low = i; 2204 } 2205 ailen[brow] = nrow; 2206 } 2207 A->same_nonzero = PETSC_FALSE; 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2213 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2214 { 2215 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2216 Mat outA; 2217 PetscErrorCode ierr; 2218 PetscBool row_identity,col_identity; 2219 2220 PetscFunctionBegin; 2221 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2222 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2223 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2224 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2225 2226 outA = inA; 2227 inA->factortype = MAT_FACTOR_LU; 2228 2229 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2230 2231 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2232 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2233 a->row = row; 2234 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2235 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2236 a->col = col; 2237 2238 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2239 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2240 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2241 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2242 2243 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2244 if (!a->solve_work) { 2245 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2246 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2247 } 2248 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2254 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2255 { 2256 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2257 PetscInt i,nz,mbs; 2258 2259 PetscFunctionBegin; 2260 nz = baij->maxnz; 2261 mbs = baij->mbs; 2262 for (i=0; i<nz; i++) { 2263 baij->j[i] = indices[i]; 2264 } 2265 baij->nz = nz; 2266 for (i=0; i<mbs; i++) { 2267 baij->ilen[i] = baij->imax[i]; 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2274 /*@ 2275 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2276 in the matrix. 2277 2278 Input Parameters: 2279 + mat - the SeqBAIJ matrix 2280 - indices - the column indices 2281 2282 Level: advanced 2283 2284 Notes: 2285 This can be called if you have precomputed the nonzero structure of the 2286 matrix and want to provide it to the matrix object to improve the performance 2287 of the MatSetValues() operation. 2288 2289 You MUST have set the correct numbers of nonzeros per row in the call to 2290 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2291 2292 MUST be called before any calls to MatSetValues(); 2293 2294 @*/ 2295 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2296 { 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2301 PetscValidPointer(indices,2); 2302 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2308 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2309 { 2310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2311 PetscErrorCode ierr; 2312 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2313 PetscReal atmp; 2314 PetscScalar *x,zero = 0.0; 2315 MatScalar *aa; 2316 PetscInt ncols,brow,krow,kcol; 2317 2318 PetscFunctionBegin; 2319 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2320 bs = A->rmap->bs; 2321 aa = a->a; 2322 ai = a->i; 2323 aj = a->j; 2324 mbs = a->mbs; 2325 2326 ierr = VecSet(v,zero);CHKERRQ(ierr); 2327 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2328 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2329 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2330 for (i=0; i<mbs; i++) { 2331 ncols = ai[1] - ai[0]; ai++; 2332 brow = bs*i; 2333 for (j=0; j<ncols; j++) { 2334 for (kcol=0; kcol<bs; kcol++) { 2335 for (krow=0; krow<bs; krow++) { 2336 atmp = PetscAbsScalar(*aa);aa++; 2337 row = brow + krow; /* row index */ 2338 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2339 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2340 } 2341 } 2342 aj++; 2343 } 2344 } 2345 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatCopy_SeqBAIJ" 2351 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2352 { 2353 PetscErrorCode ierr; 2354 2355 PetscFunctionBegin; 2356 /* If the two matrices have the same copy implementation, use fast copy. */ 2357 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2359 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2360 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2361 2362 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2363 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2364 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2365 } else { 2366 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2367 } 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2373 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2374 { 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2384 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2385 { 2386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2387 2388 PetscFunctionBegin; 2389 *array = a->a; 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2395 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2396 { 2397 PetscFunctionBegin; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2403 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2404 { 2405 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2408 PetscBLASInt one=1; 2409 2410 PetscFunctionBegin; 2411 if (str == SAME_NONZERO_PATTERN) { 2412 PetscScalar alpha = a; 2413 PetscBLASInt bnz; 2414 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2415 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2416 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2417 if (y->xtoy && y->XtoY != X) { 2418 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2419 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2420 } 2421 if (!y->xtoy) { /* get xtoy */ 2422 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2423 y->XtoY = X; 2424 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2425 } 2426 for (i=0; i<x->nz; i++) { 2427 j = 0; 2428 while (j < bs2) { 2429 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2430 j++; 2431 } 2432 } 2433 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2434 } else { 2435 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2436 } 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2442 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2443 { 2444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2445 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2446 MatScalar *aa = a->a; 2447 2448 PetscFunctionBegin; 2449 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2455 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2456 { 2457 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2458 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2459 MatScalar *aa = a->a; 2460 2461 PetscFunctionBegin; 2462 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2470 /* 2471 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2472 */ 2473 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2474 { 2475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2476 PetscErrorCode ierr; 2477 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2478 PetscInt nz = a->i[m],row,*jj,mr,col; 2479 2480 PetscFunctionBegin; 2481 *nn = n; 2482 if (!ia) PetscFunctionReturn(0); 2483 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2484 else { 2485 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2486 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2487 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2488 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2489 jj = a->j; 2490 for (i=0; i<nz; i++) { 2491 collengths[jj[i]]++; 2492 } 2493 cia[0] = oshift; 2494 for (i=0; i<n; i++) { 2495 cia[i+1] = cia[i] + collengths[i]; 2496 } 2497 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2498 jj = a->j; 2499 for (row=0; row<m; row++) { 2500 mr = a->i[row+1] - a->i[row]; 2501 for (i=0; i<mr; i++) { 2502 col = *jj++; 2503 2504 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2505 } 2506 } 2507 ierr = PetscFree(collengths);CHKERRQ(ierr); 2508 *ia = cia; *ja = cja; 2509 } 2510 PetscFunctionReturn(0); 2511 } 2512 2513 #undef __FUNCT__ 2514 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2515 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2516 { 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 if (!ia) PetscFunctionReturn(0); 2521 ierr = PetscFree(*ia);CHKERRQ(ierr); 2522 ierr = PetscFree(*ja);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2528 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2529 { 2530 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2531 PetscErrorCode ierr; 2532 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2533 PetscScalar dx,*y,*xx,*w3_array; 2534 PetscScalar *vscale_array; 2535 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2536 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2537 void *fctx = coloring->fctx; 2538 PetscBool flg = PETSC_FALSE; 2539 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2540 Vec x1_tmp; 2541 2542 PetscFunctionBegin; 2543 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2544 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2545 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2546 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2547 2548 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2549 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2550 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2551 if (flg) { 2552 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2553 } else { 2554 PetscBool assembled; 2555 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2556 if (assembled) { 2557 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2558 } 2559 } 2560 2561 x1_tmp = x1; 2562 if (!coloring->vscale) { 2563 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2564 } 2565 2566 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2567 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2568 } 2569 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2570 2571 /* Set w1 = F(x1) */ 2572 if (!coloring->fset) { 2573 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2574 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2575 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2576 } else { 2577 coloring->fset = PETSC_FALSE; 2578 } 2579 2580 if (!coloring->w3) { 2581 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2582 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2583 } 2584 w3 = coloring->w3; 2585 2586 /* Compute all the local scale factors, including ghost points */ 2587 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2588 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2589 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2590 if (ctype == IS_COLORING_GHOSTED) { 2591 col_start = 0; col_end = N; 2592 } else if (ctype == IS_COLORING_GLOBAL) { 2593 xx = xx - start; 2594 vscale_array = vscale_array - start; 2595 col_start = start; col_end = N + start; 2596 } 2597 for (col=col_start; col<col_end; col++) { 2598 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2599 if (coloring->htype[0] == 'w') { 2600 dx = 1.0 + unorm; 2601 } else { 2602 dx = xx[col]; 2603 } 2604 if (dx == (PetscScalar)0.0) dx = 1.0; 2605 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2606 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2607 dx *= epsilon; 2608 vscale_array[col] = (PetscScalar)1.0/dx; 2609 } 2610 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2611 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2612 if (ctype == IS_COLORING_GLOBAL) { 2613 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2614 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2615 } 2616 if (coloring->vscaleforrow) { 2617 vscaleforrow = coloring->vscaleforrow; 2618 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2619 2620 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2621 /* 2622 Loop over each color 2623 */ 2624 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2625 for (k=0; k<coloring->ncolors; k++) { 2626 coloring->currentcolor = k; 2627 for (i=0; i<bs; i++) { 2628 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2629 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2630 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2631 /* 2632 Loop over each column associated with color 2633 adding the perturbation to the vector w3. 2634 */ 2635 for (l=0; l<coloring->ncolumns[k]; l++) { 2636 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2637 if (coloring->htype[0] == 'w') { 2638 dx = 1.0 + unorm; 2639 } else { 2640 dx = xx[col]; 2641 } 2642 if (dx == (PetscScalar)0.0) dx = 1.0; 2643 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2644 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2645 dx *= epsilon; 2646 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2647 w3_array[col] += dx; 2648 } 2649 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2650 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2651 2652 /* 2653 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2654 w2 = F(x1 + dx) - F(x1) 2655 */ 2656 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2657 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2658 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2659 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2660 2661 /* 2662 Loop over rows of vector, putting results into Jacobian matrix 2663 */ 2664 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2665 for (l=0; l<coloring->nrows[k]; l++) { 2666 row = bs*coloring->rows[k][l]; /* local row index */ 2667 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2668 for (j=0; j<bs; j++) { 2669 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2670 srows[j] = row + start + j; 2671 } 2672 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2673 } 2674 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2675 } 2676 } /* endof for each color */ 2677 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2678 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2679 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2680 ierr = PetscFree(srows);CHKERRQ(ierr); 2681 2682 coloring->currentcolor = -1; 2683 2684 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2685 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 /* -------------------------------------------------------------------*/ 2691 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2692 MatGetRow_SeqBAIJ, 2693 MatRestoreRow_SeqBAIJ, 2694 MatMult_SeqBAIJ_N, 2695 /* 4*/ MatMultAdd_SeqBAIJ_N, 2696 MatMultTranspose_SeqBAIJ, 2697 MatMultTransposeAdd_SeqBAIJ, 2698 0, 2699 0, 2700 0, 2701 /* 10*/ 0, 2702 MatLUFactor_SeqBAIJ, 2703 0, 2704 0, 2705 MatTranspose_SeqBAIJ, 2706 /* 15*/ MatGetInfo_SeqBAIJ, 2707 MatEqual_SeqBAIJ, 2708 MatGetDiagonal_SeqBAIJ, 2709 MatDiagonalScale_SeqBAIJ, 2710 MatNorm_SeqBAIJ, 2711 /* 20*/ 0, 2712 MatAssemblyEnd_SeqBAIJ, 2713 MatSetOption_SeqBAIJ, 2714 MatZeroEntries_SeqBAIJ, 2715 /* 24*/ MatZeroRows_SeqBAIJ, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /* 29*/ MatSetUp_SeqBAIJ, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /* 34*/ MatDuplicate_SeqBAIJ, 2726 0, 2727 0, 2728 MatILUFactor_SeqBAIJ, 2729 0, 2730 /* 39*/ MatAXPY_SeqBAIJ, 2731 MatGetSubMatrices_SeqBAIJ, 2732 MatIncreaseOverlap_SeqBAIJ, 2733 MatGetValues_SeqBAIJ, 2734 MatCopy_SeqBAIJ, 2735 /* 44*/ 0, 2736 MatScale_SeqBAIJ, 2737 0, 2738 0, 2739 MatZeroRowsColumns_SeqBAIJ, 2740 /* 49*/ 0, 2741 MatGetRowIJ_SeqBAIJ, 2742 MatRestoreRowIJ_SeqBAIJ, 2743 MatGetColumnIJ_SeqBAIJ, 2744 MatRestoreColumnIJ_SeqBAIJ, 2745 /* 54*/ MatFDColoringCreate_SeqAIJ, 2746 0, 2747 0, 2748 0, 2749 MatSetValuesBlocked_SeqBAIJ, 2750 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2751 MatDestroy_SeqBAIJ, 2752 MatView_SeqBAIJ, 2753 0, 2754 0, 2755 /* 64*/ 0, 2756 0, 2757 0, 2758 0, 2759 0, 2760 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2761 0, 2762 MatConvert_Basic, 2763 0, 2764 0, 2765 /* 74*/ 0, 2766 MatFDColoringApply_BAIJ, 2767 0, 2768 0, 2769 0, 2770 /* 79*/ 0, 2771 0, 2772 0, 2773 0, 2774 MatLoad_SeqBAIJ, 2775 /* 84*/ 0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /* 89*/ 0, 2781 0, 2782 0, 2783 0, 2784 0, 2785 /* 94*/ 0, 2786 0, 2787 0, 2788 0, 2789 0, 2790 /* 99*/ 0, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*104*/ 0, 2796 MatRealPart_SeqBAIJ, 2797 MatImaginaryPart_SeqBAIJ, 2798 0, 2799 0, 2800 /*109*/ 0, 2801 0, 2802 0, 2803 0, 2804 MatMissingDiagonal_SeqBAIJ, 2805 /*114*/ 0, 2806 0, 2807 0, 2808 0, 2809 0, 2810 /*119*/ 0, 2811 0, 2812 MatMultHermitianTranspose_SeqBAIJ, 2813 MatMultHermitianTransposeAdd_SeqBAIJ, 2814 0, 2815 /*124*/ 0, 2816 0, 2817 MatInvertBlockDiagonal_SeqBAIJ, 2818 0, 2819 0, 2820 /*129*/ 0, 2821 0, 2822 0, 2823 0, 2824 0, 2825 /*134*/ 0, 2826 0, 2827 0, 2828 0, 2829 0, 2830 /*139*/ 0, 2831 0 2832 }; 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2836 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2837 { 2838 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2839 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2844 2845 /* allocate space for values if not already there */ 2846 if (!aij->saved_values) { 2847 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2848 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2849 } 2850 2851 /* copy values over */ 2852 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2853 PetscFunctionReturn(0); 2854 } 2855 2856 #undef __FUNCT__ 2857 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2858 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2859 { 2860 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2861 PetscErrorCode ierr; 2862 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2863 2864 PetscFunctionBegin; 2865 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2866 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2867 2868 /* copy values over */ 2869 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2870 PetscFunctionReturn(0); 2871 } 2872 2873 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2874 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2878 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2879 { 2880 Mat_SeqBAIJ *b; 2881 PetscErrorCode ierr; 2882 PetscInt i,mbs,nbs,bs2; 2883 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2884 2885 PetscFunctionBegin; 2886 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2887 if (nz == MAT_SKIP_ALLOCATION) { 2888 skipallocation = PETSC_TRUE; 2889 nz = 0; 2890 } 2891 2892 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2894 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2895 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2896 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2897 2898 B->preallocated = PETSC_TRUE; 2899 2900 mbs = B->rmap->n/bs; 2901 nbs = B->cmap->n/bs; 2902 bs2 = bs*bs; 2903 2904 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2905 2906 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2907 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2908 if (nnz) { 2909 for (i=0; i<mbs; i++) { 2910 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2911 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2912 } 2913 } 2914 2915 b = (Mat_SeqBAIJ*)B->data; 2916 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2917 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2918 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2919 2920 if (!flg) { 2921 switch (bs) { 2922 case 1: 2923 B->ops->mult = MatMult_SeqBAIJ_1; 2924 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2925 break; 2926 case 2: 2927 B->ops->mult = MatMult_SeqBAIJ_2; 2928 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2929 break; 2930 case 3: 2931 B->ops->mult = MatMult_SeqBAIJ_3; 2932 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2933 break; 2934 case 4: 2935 B->ops->mult = MatMult_SeqBAIJ_4; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2937 break; 2938 case 5: 2939 B->ops->mult = MatMult_SeqBAIJ_5; 2940 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2941 break; 2942 case 6: 2943 B->ops->mult = MatMult_SeqBAIJ_6; 2944 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2945 break; 2946 case 7: 2947 B->ops->mult = MatMult_SeqBAIJ_7; 2948 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2949 break; 2950 case 15: 2951 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2952 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2953 break; 2954 default: 2955 B->ops->mult = MatMult_SeqBAIJ_N; 2956 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2957 break; 2958 } 2959 } 2960 B->ops->sor = MatSOR_SeqBAIJ; 2961 b->mbs = mbs; 2962 b->nbs = nbs; 2963 if (!skipallocation) { 2964 if (!b->imax) { 2965 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2966 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2967 2968 b->free_imax_ilen = PETSC_TRUE; 2969 } 2970 /* b->ilen will count nonzeros in each block row so far. */ 2971 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2972 if (!nnz) { 2973 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2974 else if (nz < 0) nz = 1; 2975 for (i=0; i<mbs; i++) b->imax[i] = nz; 2976 nz = nz*mbs; 2977 } else { 2978 nz = 0; 2979 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2980 } 2981 2982 /* allocate the matrix space */ 2983 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2984 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2985 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2986 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2987 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2988 2989 b->singlemalloc = PETSC_TRUE; 2990 b->i[0] = 0; 2991 for (i=1; i<mbs+1; i++) { 2992 b->i[i] = b->i[i-1] + b->imax[i-1]; 2993 } 2994 b->free_a = PETSC_TRUE; 2995 b->free_ij = PETSC_TRUE; 2996 } else { 2997 b->free_a = PETSC_FALSE; 2998 b->free_ij = PETSC_FALSE; 2999 } 3000 3001 b->bs2 = bs2; 3002 b->mbs = mbs; 3003 b->nz = 0; 3004 b->maxnz = nz; 3005 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3006 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3012 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3013 { 3014 PetscInt i,m,nz,nz_max=0,*nnz; 3015 PetscScalar *values=0; 3016 PetscErrorCode ierr; 3017 3018 PetscFunctionBegin; 3019 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3020 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3021 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3022 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3023 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3024 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3025 m = B->rmap->n/bs; 3026 3027 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3028 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3029 for (i=0; i<m; i++) { 3030 nz = ii[i+1]- ii[i]; 3031 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3032 nz_max = PetscMax(nz_max, nz); 3033 nnz[i] = nz; 3034 } 3035 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3036 ierr = PetscFree(nnz);CHKERRQ(ierr); 3037 3038 values = (PetscScalar*)V; 3039 if (!values) { 3040 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3041 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3042 } 3043 for (i=0; i<m; i++) { 3044 PetscInt ncols = ii[i+1] - ii[i]; 3045 const PetscInt *icols = jj + ii[i]; 3046 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3047 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3048 } 3049 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3050 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 3056 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3057 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3058 #if defined(PETSC_HAVE_MUMPS) 3059 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3060 #endif 3061 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3062 3063 /*MC 3064 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3065 block sparse compressed row format. 3066 3067 Options Database Keys: 3068 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3069 3070 Level: beginner 3071 3072 .seealso: MatCreateSeqBAIJ() 3073 M*/ 3074 3075 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3076 3077 #undef __FUNCT__ 3078 #define __FUNCT__ "MatCreate_SeqBAIJ" 3079 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3080 { 3081 PetscErrorCode ierr; 3082 PetscMPIInt size; 3083 Mat_SeqBAIJ *b; 3084 3085 PetscFunctionBegin; 3086 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3087 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3088 3089 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3090 B->data = (void*)b; 3091 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3092 3093 b->row = 0; 3094 b->col = 0; 3095 b->icol = 0; 3096 b->reallocs = 0; 3097 b->saved_values = 0; 3098 3099 b->roworiented = PETSC_TRUE; 3100 b->nonew = 0; 3101 b->diag = 0; 3102 b->solve_work = 0; 3103 b->mult_work = 0; 3104 B->spptr = 0; 3105 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3106 b->keepnonzeropattern = PETSC_FALSE; 3107 b->xtoy = 0; 3108 b->XtoY = 0; 3109 B->same_nonzero = PETSC_FALSE; 3110 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3114 #if defined(PETSC_HAVE_MUMPS) 3115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3116 #endif 3117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3127 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3128 PetscFunctionReturn(0); 3129 } 3130 3131 #undef __FUNCT__ 3132 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3133 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3134 { 3135 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3136 PetscErrorCode ierr; 3137 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3138 3139 PetscFunctionBegin; 3140 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3141 3142 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3143 c->imax = a->imax; 3144 c->ilen = a->ilen; 3145 c->free_imax_ilen = PETSC_FALSE; 3146 } else { 3147 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3148 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3149 for (i=0; i<mbs; i++) { 3150 c->imax[i] = a->imax[i]; 3151 c->ilen[i] = a->ilen[i]; 3152 } 3153 c->free_imax_ilen = PETSC_TRUE; 3154 } 3155 3156 /* allocate the matrix space */ 3157 if (mallocmatspace) { 3158 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3159 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3160 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3161 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3162 3163 c->i = a->i; 3164 c->j = a->j; 3165 c->singlemalloc = PETSC_FALSE; 3166 c->free_a = PETSC_TRUE; 3167 c->free_ij = PETSC_FALSE; 3168 c->parent = A; 3169 C->preallocated = PETSC_TRUE; 3170 C->assembled = PETSC_TRUE; 3171 3172 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3173 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3174 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3175 } else { 3176 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3177 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3178 3179 c->singlemalloc = PETSC_TRUE; 3180 c->free_a = PETSC_TRUE; 3181 c->free_ij = PETSC_TRUE; 3182 3183 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3184 if (mbs > 0) { 3185 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3186 if (cpvalues == MAT_COPY_VALUES) { 3187 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3188 } else { 3189 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3190 } 3191 } 3192 C->preallocated = PETSC_TRUE; 3193 C->assembled = PETSC_TRUE; 3194 } 3195 } 3196 3197 c->roworiented = a->roworiented; 3198 c->nonew = a->nonew; 3199 3200 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3201 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3202 3203 c->bs2 = a->bs2; 3204 c->mbs = a->mbs; 3205 c->nbs = a->nbs; 3206 3207 if (a->diag) { 3208 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3209 c->diag = a->diag; 3210 c->free_diag = PETSC_FALSE; 3211 } else { 3212 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3213 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3214 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3215 c->free_diag = PETSC_TRUE; 3216 } 3217 } else c->diag = 0; 3218 3219 c->nz = a->nz; 3220 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3221 c->solve_work = 0; 3222 c->mult_work = 0; 3223 3224 c->compressedrow.use = a->compressedrow.use; 3225 c->compressedrow.nrows = a->compressedrow.nrows; 3226 c->compressedrow.check = a->compressedrow.check; 3227 if (a->compressedrow.use) { 3228 i = a->compressedrow.nrows; 3229 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3230 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3231 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3232 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3233 } else { 3234 c->compressedrow.use = PETSC_FALSE; 3235 c->compressedrow.i = NULL; 3236 c->compressedrow.rindex = NULL; 3237 } 3238 C->same_nonzero = A->same_nonzero; 3239 3240 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3241 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3247 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3248 { 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3253 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3254 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3255 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatLoad_SeqBAIJ" 3261 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3262 { 3263 Mat_SeqBAIJ *a; 3264 PetscErrorCode ierr; 3265 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3266 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3267 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3268 PetscInt *masked,nmask,tmp,bs2,ishift; 3269 PetscMPIInt size; 3270 int fd; 3271 PetscScalar *aa; 3272 MPI_Comm comm; 3273 3274 PetscFunctionBegin; 3275 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3276 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3277 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3278 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3279 bs2 = bs*bs; 3280 3281 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3282 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3283 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3284 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3285 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3286 M = header[1]; N = header[2]; nz = header[3]; 3287 3288 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3289 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3290 3291 /* 3292 This code adds extra rows to make sure the number of rows is 3293 divisible by the blocksize 3294 */ 3295 mbs = M/bs; 3296 extra_rows = bs - M + bs*(mbs); 3297 if (extra_rows == bs) extra_rows = 0; 3298 else mbs++; 3299 if (extra_rows) { 3300 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3301 } 3302 3303 /* Set global sizes if not already set */ 3304 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3305 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3306 } else { /* Check if the matrix global sizes are correct */ 3307 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3308 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3309 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3310 } 3311 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3312 } 3313 3314 /* read in row lengths */ 3315 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3316 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3317 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3318 3319 /* read in column indices */ 3320 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3321 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3322 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3323 3324 /* loop over row lengths determining block row lengths */ 3325 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3326 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3327 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3328 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3329 rowcount = 0; 3330 nzcount = 0; 3331 for (i=0; i<mbs; i++) { 3332 nmask = 0; 3333 for (j=0; j<bs; j++) { 3334 kmax = rowlengths[rowcount]; 3335 for (k=0; k<kmax; k++) { 3336 tmp = jj[nzcount++]/bs; 3337 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3338 } 3339 rowcount++; 3340 } 3341 browlengths[i] += nmask; 3342 /* zero out the mask elements we set */ 3343 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3344 } 3345 3346 /* Do preallocation */ 3347 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3348 a = (Mat_SeqBAIJ*)newmat->data; 3349 3350 /* set matrix "i" values */ 3351 a->i[0] = 0; 3352 for (i=1; i<= mbs; i++) { 3353 a->i[i] = a->i[i-1] + browlengths[i-1]; 3354 a->ilen[i-1] = browlengths[i-1]; 3355 } 3356 a->nz = 0; 3357 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3358 3359 /* read in nonzero values */ 3360 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3361 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3362 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3363 3364 /* set "a" and "j" values into matrix */ 3365 nzcount = 0; jcount = 0; 3366 for (i=0; i<mbs; i++) { 3367 nzcountb = nzcount; 3368 nmask = 0; 3369 for (j=0; j<bs; j++) { 3370 kmax = rowlengths[i*bs+j]; 3371 for (k=0; k<kmax; k++) { 3372 tmp = jj[nzcount++]/bs; 3373 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3374 } 3375 } 3376 /* sort the masked values */ 3377 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3378 3379 /* set "j" values into matrix */ 3380 maskcount = 1; 3381 for (j=0; j<nmask; j++) { 3382 a->j[jcount++] = masked[j]; 3383 mask[masked[j]] = maskcount++; 3384 } 3385 /* set "a" values into matrix */ 3386 ishift = bs2*a->i[i]; 3387 for (j=0; j<bs; j++) { 3388 kmax = rowlengths[i*bs+j]; 3389 for (k=0; k<kmax; k++) { 3390 tmp = jj[nzcountb]/bs; 3391 block = mask[tmp] - 1; 3392 point = jj[nzcountb] - bs*tmp; 3393 idx = ishift + bs2*block + j + bs*point; 3394 a->a[idx] = (MatScalar)aa[nzcountb++]; 3395 } 3396 } 3397 /* zero out the mask elements we set */ 3398 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3399 } 3400 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3401 3402 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3403 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3404 ierr = PetscFree(aa);CHKERRQ(ierr); 3405 ierr = PetscFree(jj);CHKERRQ(ierr); 3406 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3407 3408 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3409 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 3413 #undef __FUNCT__ 3414 #define __FUNCT__ "MatCreateSeqBAIJ" 3415 /*@C 3416 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3417 compressed row) format. For good matrix assembly performance the 3418 user should preallocate the matrix storage by setting the parameter nz 3419 (or the array nnz). By setting these parameters accurately, performance 3420 during matrix assembly can be increased by more than a factor of 50. 3421 3422 Collective on MPI_Comm 3423 3424 Input Parameters: 3425 + comm - MPI communicator, set to PETSC_COMM_SELF 3426 . bs - size of block 3427 . m - number of rows 3428 . n - number of columns 3429 . nz - number of nonzero blocks per block row (same for all rows) 3430 - nnz - array containing the number of nonzero blocks in the various block rows 3431 (possibly different for each block row) or NULL 3432 3433 Output Parameter: 3434 . A - the matrix 3435 3436 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3437 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3438 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3439 3440 Options Database Keys: 3441 . -mat_no_unroll - uses code that does not unroll the loops in the 3442 block calculations (much slower) 3443 . -mat_block_size - size of the blocks to use 3444 3445 Level: intermediate 3446 3447 Notes: 3448 The number of rows and columns must be divisible by blocksize. 3449 3450 If the nnz parameter is given then the nz parameter is ignored 3451 3452 A nonzero block is any block that as 1 or more nonzeros in it 3453 3454 The block AIJ format is fully compatible with standard Fortran 77 3455 storage. That is, the stored row and column indices can begin at 3456 either one (as in Fortran) or zero. See the users' manual for details. 3457 3458 Specify the preallocated storage with either nz or nnz (not both). 3459 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3460 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3461 matrices. 3462 3463 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3464 @*/ 3465 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3466 { 3467 PetscErrorCode ierr; 3468 3469 PetscFunctionBegin; 3470 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3471 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3472 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3473 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3474 PetscFunctionReturn(0); 3475 } 3476 3477 #undef __FUNCT__ 3478 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3479 /*@C 3480 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3481 per row in the matrix. For good matrix assembly performance the 3482 user should preallocate the matrix storage by setting the parameter nz 3483 (or the array nnz). By setting these parameters accurately, performance 3484 during matrix assembly can be increased by more than a factor of 50. 3485 3486 Collective on MPI_Comm 3487 3488 Input Parameters: 3489 + A - the matrix 3490 . bs - size of block 3491 . nz - number of block nonzeros per block row (same for all rows) 3492 - nnz - array containing the number of block nonzeros in the various block rows 3493 (possibly different for each block row) or NULL 3494 3495 Options Database Keys: 3496 . -mat_no_unroll - uses code that does not unroll the loops in the 3497 block calculations (much slower) 3498 . -mat_block_size - size of the blocks to use 3499 3500 Level: intermediate 3501 3502 Notes: 3503 If the nnz parameter is given then the nz parameter is ignored 3504 3505 You can call MatGetInfo() to get information on how effective the preallocation was; 3506 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3507 You can also run with the option -info and look for messages with the string 3508 malloc in them to see if additional memory allocation was needed. 3509 3510 The block AIJ format is fully compatible with standard Fortran 77 3511 storage. That is, the stored row and column indices can begin at 3512 either one (as in Fortran) or zero. See the users' manual for details. 3513 3514 Specify the preallocated storage with either nz or nnz (not both). 3515 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3516 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3517 3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3519 @*/ 3520 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3521 { 3522 PetscErrorCode ierr; 3523 3524 PetscFunctionBegin; 3525 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3526 PetscValidType(B,1); 3527 PetscValidLogicalCollectiveInt(B,bs,2); 3528 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3529 PetscFunctionReturn(0); 3530 } 3531 3532 #undef __FUNCT__ 3533 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3534 /*@C 3535 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3536 (the default sequential PETSc format). 3537 3538 Collective on MPI_Comm 3539 3540 Input Parameters: 3541 + A - the matrix 3542 . i - the indices into j for the start of each local row (starts with zero) 3543 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3544 - v - optional values in the matrix 3545 3546 Level: developer 3547 3548 .keywords: matrix, aij, compressed row, sparse 3549 3550 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3551 @*/ 3552 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3553 { 3554 PetscErrorCode ierr; 3555 3556 PetscFunctionBegin; 3557 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3558 PetscValidType(B,1); 3559 PetscValidLogicalCollectiveInt(B,bs,2); 3560 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3567 /*@ 3568 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3569 3570 Collective on MPI_Comm 3571 3572 Input Parameters: 3573 + comm - must be an MPI communicator of size 1 3574 . bs - size of block 3575 . m - number of rows 3576 . n - number of columns 3577 . i - row indices 3578 . j - column indices 3579 - a - matrix values 3580 3581 Output Parameter: 3582 . mat - the matrix 3583 3584 Level: advanced 3585 3586 Notes: 3587 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3588 once the matrix is destroyed 3589 3590 You cannot set new nonzero locations into this matrix, that will generate an error. 3591 3592 The i and j indices are 0 based 3593 3594 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3595 3596 3597 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3598 3599 @*/ 3600 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3601 { 3602 PetscErrorCode ierr; 3603 PetscInt ii; 3604 Mat_SeqBAIJ *baij; 3605 3606 PetscFunctionBegin; 3607 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3608 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3609 3610 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3611 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3612 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3613 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3614 baij = (Mat_SeqBAIJ*)(*mat)->data; 3615 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3616 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3617 3618 baij->i = i; 3619 baij->j = j; 3620 baij->a = a; 3621 3622 baij->singlemalloc = PETSC_FALSE; 3623 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3624 baij->free_a = PETSC_FALSE; 3625 baij->free_ij = PETSC_FALSE; 3626 3627 for (ii=0; ii<m; ii++) { 3628 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3629 #if defined(PETSC_USE_DEBUG) 3630 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3631 #endif 3632 } 3633 #if defined(PETSC_USE_DEBUG) 3634 for (ii=0; ii<baij->i[m]; ii++) { 3635 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3636 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3637 } 3638 #endif 3639 3640 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3642 PetscFunctionReturn(0); 3643 } 3644